[llvm] 3bffb1c - [AMDGPU] Use single cache policy operand
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 15 13:01:40 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-03-15T13:00:59-07:00
New Revision: 3bffb1cd0ef63858bcc88a6ef39d66c27a872df8
URL: https://github.com/llvm/llvm-project/commit/3bffb1cd0ef63858bcc88a6ef39d66c27a872df8
DIFF: https://github.com/llvm/llvm-project/commit/3bffb1cd0ef63858bcc88a6ef39d66c27a872df8.diff
LOG: [AMDGPU] Use single cache policy operand
Replace individual operands GLC, SLC, and DLC with a single cache_policy
bitmask operand. This will reduce the number of operands in MIR and I hope
the amount of code. These operands are mostly 0 anyway.
Additional advantage that parser will accept these flags in any order unlike
now.
Differential Revision: https://reviews.llvm.org/D96469
Added:
llvm/test/MC/AMDGPU/cpol-err.s
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
llvm/lib/Target/AMDGPU/MIMGInstructions.td
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrFormats.td
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir
llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
llvm/test/CodeGen/AMDGPU/bundle-latency.mir
llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
llvm/test/CodeGen/AMDGPU/dead-lane.mir
llvm/test/CodeGen/AMDGPU/dead_copy.mir
llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir
llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
llvm/test/CodeGen/AMDGPU/fold-multiple.mir
llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
llvm/test/CodeGen/AMDGPU/hard-clauses.mir
llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
llvm/test/CodeGen/AMDGPU/hazard-kill.mir
llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir
llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir
llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir
llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir
llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
llvm/test/CodeGen/AMDGPU/mai-hazards.mir
llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir
llvm/test/CodeGen/AMDGPU/memory_clause.mir
llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir
llvm/test/CodeGen/AMDGPU/merge-image-load.mir
llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir
llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
llvm/test/CodeGen/AMDGPU/merge-load-store.mir
llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir
llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
llvm/test/CodeGen/AMDGPU/shrink-carry.mir
llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
llvm/test/CodeGen/AMDGPU/spill-agpr.mir
llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
llvm/test/CodeGen/AMDGPU/subvector-test.mir
llvm/test/CodeGen/AMDGPU/syncscopes.ll
llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir
llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir
llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
llvm/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir
llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir
llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir
llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir
llvm/test/CodeGen/AMDGPU/waitcnt.mir
llvm/test/CodeGen/AMDGPU/wqm.mir
llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir
llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-scc.mir
llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir
llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
llvm/test/MC/AMDGPU/atomic-fadd-insts.s
llvm/test/MC/AMDGPU/flat-gfx10.s
llvm/test/MC/AMDGPU/flat-gfx9.s
llvm/test/MC/AMDGPU/gfx90a_asm_features.s
llvm/test/MC/AMDGPU/gfx90a_err.s
llvm/test/MC/AMDGPU/mubuf-gfx10.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 38f3bb900c84..1a5f7aafbb43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -328,20 +328,14 @@ def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
GISDNodeXFormEquiv<IMMPopCount>;
-def gi_extract_glc : GICustomOperandRenderer<"renderExtractGLC">,
- GISDNodeXFormEquiv<extract_glc>;
-
-def gi_extract_slc : GICustomOperandRenderer<"renderExtractSLC">,
- GISDNodeXFormEquiv<extract_slc>;
-
-def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">,
- GISDNodeXFormEquiv<extract_dlc>;
+def gi_extract_cpol : GICustomOperandRenderer<"renderExtractCPol">,
+ GISDNodeXFormEquiv<extract_cpol>;
def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">,
GISDNodeXFormEquiv<extract_swz>;
-def gi_extract_sccb : GICustomOperandRenderer<"renderExtractSCCB">,
- GISDNodeXFormEquiv<extract_sccb>;
+def gi_set_glc : GICustomOperandRenderer<"renderSetGLC">,
+ GISDNodeXFormEquiv<set_glc>;
def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
GISDNodeXFormEquiv<frameindex_to_targetframeindex>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 619a8912a81e..28a21a1270ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -188,16 +188,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC, SDValue &SWZ,
- SDValue &SCCB) const;
+ SDValue &Idxen, SDValue &Addr64, SDValue &CPol, SDValue &TFE,
+ SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE, SDValue &DLC,
- SDValue &SWZ, SDValue &SCCB) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
- SDValue &SLC) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &CPol,
+ SDValue &TFE, SDValue &SWZ) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent,
SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
@@ -206,11 +203,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
- SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC, SDValue &SWZ,
- SDValue &SCCB) const;
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset, SDValue &SLC) const;
+ SDValue &Offset, SDValue &CPol, SDValue &TFE,
+ SDValue &SWZ) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
@@ -1393,13 +1387,11 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64,
- SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC,
- SDValue &SWZ, SDValue &SCCB) const {
+bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset,
+ SDValue &Offen, SDValue &Idxen,
+ SDValue &Addr64, SDValue &CPol,
+ SDValue &TFE, SDValue &SWZ) const {
// Subtarget prefers to use flat instruction
// FIXME: This should be a pattern predicate and not reach here
if (Subtarget->useFlatForGlobal())
@@ -1407,14 +1399,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDLoc DL(Addr);
- if (!GLC.getNode())
- GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
- if (!SLC.getNode())
- SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ if (!CPol)
+ CPol = CurDAG->getTargetConstant(0, DL, MVT::i32);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
- DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SCCB = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1492,10 +1480,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE,
- SDValue &DLC, SDValue &SWZ,
- SDValue &SCCB) const {
+ SDValue &Offset, SDValue &CPol,
+ SDValue &TFE, SDValue &SWZ) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@@ -1504,7 +1490,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC, SWZ, SCCB))
+ CPol, TFE, SWZ))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1523,12 +1509,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset,
- SDValue &SLC) const {
- SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE, DLC, SWZ, SCCB;
+ SDValue &Offset) const {
+ SDValue CPol, TFE, SWZ;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, CPol, TFE, SWZ);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1650,15 +1634,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
- SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC,
- SDValue &SWZ, SDValue &SCCB) const {
+ SDValue &CPol, SDValue &TFE,
+ SDValue &SWZ) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC, SWZ, SCCB))
+ CPol, TFE, SWZ))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1680,16 +1663,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
- SDValue GLC, SLC, TFE, DLC, SWZ, SCCB;
-
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
-}
-bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
- SDValue &Soffset, SDValue &Offset,
- SDValue &SLC) const {
- SDValue GLC, TFE, DLC, SWZ, SCCB;
+ SDValue CPol, TFE, SWZ;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, CPol, TFE, SWZ);
}
// Find a load or store from corresponding pattern root.
@@ -2383,35 +2359,32 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MachineSDNode *CmpSwap = nullptr;
if (Subtarget->hasAddr64()) {
- SDValue SRsrc, VAddr, SOffset, Offset, SLC;
+ SDValue SRsrc, VAddr, SOffset, Offset;
- if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
+ if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
SDValue CmpVal = Mem->getOperand(2);
- SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
+ SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
// XXX - Do we care about glue operands?
- SDValue Ops[] = {
- CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
- };
+ SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
+ Mem->getChain()};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}
}
if (!CmpSwap) {
- SDValue SRsrc, SOffset, Offset, SLC;
- if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
+ SDValue SRsrc, SOffset, Offset;
+ if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
SDValue CmpVal = Mem->getOperand(2);
- SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
- SDValue Ops[] = {
- CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
- };
+ SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
+ SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 8c587b5e67f9..e2e0321d9f35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1460,28 +1460,6 @@ static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
return TexFailCtrl == 0;
}
-static bool parseCachePolicy(uint64_t Value,
- bool *GLC, bool *SLC, bool *DLC, bool *SCC) {
- if (GLC) {
- *GLC = (Value & 0x1) ? 1 : 0;
- Value &= ~(uint64_t)0x1;
- }
- if (SLC) {
- *SLC = (Value & 0x2) ? 1 : 0;
- Value &= ~(uint64_t)0x2;
- }
- if (DLC) {
- *DLC = (Value & 0x4) ? 1 : 0;
- Value &= ~(uint64_t)0x4;
- }
- if (SCC) {
- *SCC = (Value & 0x10) ? 1 : 0;
- Value &= ~(uint64_t)0x10;
- }
-
- return Value == 0;
-}
-
bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
MachineBasicBlock *MBB = MI.getParent();
@@ -1607,22 +1585,11 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
// TODO: Check this in verifier.
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
- bool GLC = false;
- bool SLC = false;
- bool DLC = false;
- bool SCC = false;
- if (BaseOpcode->Atomic) {
- GLC = true; // TODO no-return optimization
- if (!parseCachePolicy(
- MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr,
- &SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
- return false;
- } else {
- if (!parseCachePolicy(
- MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC,
- &SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
- return false;
- }
+ unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
+ if (BaseOpcode->Atomic)
+ CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ if (CPol & ~AMDGPU::CPol::ALL)
+ return false;
int NumVAddrRegs = 0;
int NumVAddrDwords = 0;
@@ -1708,13 +1675,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (IsGFX10Plus)
MIB.addImm(DimInfo->Encoding);
MIB.addImm(Unorm);
- if (IsGFX10Plus)
- MIB.addImm(DLC);
- else
- MIB.addImm(SCC);
- MIB.addImm(GLC);
- MIB.addImm(SLC);
+ MIB.addImm(CPol);
MIB.addImm(IsA16 && // a16 or r128
STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
if (IsGFX10Plus)
@@ -2407,8 +2369,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
MIB.addImm(0);
MIB.addImm(Offset);
- MIB.addImm(1); // glc
- MIB.addImm(0); // slc
+ MIB.addImm(AMDGPU::CPol::GLC);
MIB.cloneMemRefs(MI);
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg)
@@ -2991,7 +2952,7 @@ bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
I.add(MI.getOperand(2)); // rsrc
I.add(SOffset);
I.addImm(Offset);
- renderExtractSLC(I, MI, 7);
+ I.addImm(MI.getOperand(7).getImm()); // cpol
I.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -3029,8 +2990,7 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
.addReg(Addr.first)
.addReg(Data)
.addImm(Addr.second)
- .addImm(0) // SLC
- .addImm(0) // SSCB
+ .addImm(0) // cpol
.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -4153,12 +4113,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
},
- addZeroImm, // glc
- addZeroImm, // slc
+ addZeroImm, // cpol
addZeroImm, // tfe
- addZeroImm, // dlc
- addZeroImm, // swz
- addZeroImm // scc
+ addZeroImm // swz
}};
}
@@ -4182,12 +4139,9 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
- addZeroImm, // glc
- addZeroImm, // slc
+ addZeroImm, // cpol
addZeroImm, // tfe
- addZeroImm, // dlc
addZeroImm, // swz
- addZeroImm // scc
}};
}
@@ -4219,7 +4173,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
},
- addZeroImm // slc
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addImm(AMDGPU::CPol::GLC); // cpol
+ }
}};
}
@@ -4243,7 +4199,7 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
- addZeroImm // slc
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(AMDGPU::CPol::GLC); } // cpol
}};
}
@@ -4333,25 +4289,11 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
MIB.addImm(MI.getOperand(OpIdx).getImm());
}
-void AMDGPUInstructionSelector::renderExtractGLC(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm(MI.getOperand(OpIdx).getImm() & 1);
-}
-
-void AMDGPUInstructionSelector::renderExtractSLC(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
- assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 1) & 1);
-}
-
-void AMDGPUInstructionSelector::renderExtractDLC(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 2) & 1);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
@@ -4361,11 +4303,11 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
}
-void AMDGPUInstructionSelector::renderExtractSCCB(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 4) & 1);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() | AMDGPU::CPol::GLC);
}
void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index dd0b613dd4c7..878e3a99b6ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -286,16 +286,12 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
- void renderExtractGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
- void renderExtractSLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
- void renderExtractDLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ void renderExtractCPol(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
- void renderExtractSCCB(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ void renderSetGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 477f04e1673d..0f29c97c2a1e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1702,26 +1702,14 @@ static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
}
-static unsigned extractGLC(unsigned CachePolicy) {
- return CachePolicy & 1;
-}
-
-static unsigned extractSLC(unsigned CachePolicy) {
- return (CachePolicy >> 1) & 1;
-}
-
-static unsigned extractDLC(unsigned CachePolicy) {
- return (CachePolicy >> 2) & 1;
+static unsigned extractCPol(unsigned CachePolicy) {
+ return CachePolicy & AMDGPU::CPol::ALL;
}
static unsigned extractSWZ(unsigned CachePolicy) {
return (CachePolicy >> 3) & 1;
}
-static unsigned extractSCCB(unsigned CachePolicy) {
- return (CachePolicy >> 4) & 1;
-}
-
MachineInstr *
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
@@ -1787,12 +1775,9 @@ AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
- .addImm(extractGLC(CachePolicy))
- .addImm(extractSLC(CachePolicy))
+ .addImm(extractCPol(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
- .addImm(extractDLC(CachePolicy))
.addImm(extractSWZ(CachePolicy))
- .addImm(extractSCCB(CachePolicy))
.cloneMemRefs(MI);
// FIXME: We need a way to report failure from applyMappingImpl.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7c6732b4579d..5547cd6c1c32 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -114,10 +114,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
- ImmTyDLC,
- ImmTySCCB,
- ImmTyGLC,
- ImmTySLC,
+ ImmTyCPol,
ImmTySWZ,
ImmTyTFE,
ImmTyD16,
@@ -340,13 +337,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
- bool isDLC() const { return isImmTy(ImmTyDLC); }
- bool isSCCB() const { return isImmTy(ImmTySCCB); }
- bool isGLC() const { return isImmTy(ImmTyGLC); }
- // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
- // value of the GLC operand.
- bool isGLC_1() const { return isImmTy(ImmTyGLC); }
- bool isSLC() const { return isImmTy(ImmTySLC); }
+ bool isCPol() const { return isImmTy(ImmTyCPol); }
+ // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and
+ // forced value of the GLC operand.
+ bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); }
bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
@@ -998,10 +992,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
- case ImmTyDLC: OS << "DLC"; break;
- case ImmTySCCB: OS << "SCCB"; break;
- case ImmTyGLC: OS << "GLC"; break;
- case ImmTySLC: OS << "SLC"; break;
+ case ImmTyCPol: OS << "CPol"; break;
case ImmTySWZ: OS << "SWZ"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
@@ -1183,6 +1174,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ForcedDPP = false;
bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
+ unsigned CPolSeen;
/// @name Auto-generated Match Functions
/// {
@@ -1267,7 +1259,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
- bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
+ bool IsAtomic, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded);
@@ -1462,6 +1454,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
OperandMatchResultTy
parseNamedBit(StringRef Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseCPol(OperandVector &Operands);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value,
SMLoc &StringLoc);
@@ -1621,17 +1614,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
int64_t parseGPRIdxMacro();
- void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
- void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
- void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
- void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
+ void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
+ void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
+ void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
- AMDGPUOperand::Ptr defaultDLC() const;
- AMDGPUOperand::Ptr defaultSCCB() const;
- AMDGPUOperand::Ptr defaultGLC() const;
- AMDGPUOperand::Ptr defaultGLC_1() const;
- AMDGPUOperand::Ptr defaultSLC() const;
+ AMDGPUOperand::Ptr defaultCPol() const;
+ AMDGPUOperand::Ptr defaultCPol_GLC1() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMEMOffset() const;
@@ -1653,6 +1642,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
+ void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
+
bool parseDimId(unsigned &Encoding);
OperandMatchResultTy parseDim(OperandVector &Operands);
OperandMatchResultTy parseDPP8(OperandVector &Operands);
@@ -4096,15 +4087,44 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
const OperandVector &Operands,
const SMLoc &IDLoc) {
- int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
- AMDGPU::OpName::glc1);
- if (GLCPos != -1) {
- // -1 is set by GLC_1 default operand. In all cases "glc" must be present
- // in the asm string, and the default value means it is not present.
- if (Inst.getOperand(GLCPos).getImm() == -1) {
+ int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::cpol);
+ if (CPolPos == -1)
+ return true;
+
+ unsigned CPol = Inst.getOperand(CPolPos).getImm();
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & (SIInstrFlags::SMRD)) &&
+ (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
+ Error(IDLoc, "invalid cache policy for SMRD instruction");
+ return false;
+ }
+
+ if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
+ return true;
+
+ if (TSFlags & SIInstrFlags::IsAtomicRet) {
+ if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
Error(IDLoc, "instruction must use glc");
return false;
}
+ } else {
+ if (CPol & CPol::GLC) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
+ Error(S, "instruction must not use glc");
+ return false;
+ }
+ }
+
+ if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
+ Error(S, "instruction must not use scc");
+ return false;
}
return true;
@@ -5280,6 +5300,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
OperandMode Mode = OperandMode_Default;
if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
Mode = OperandMode_NSA;
+ CPolSeen = 0;
OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
if (Res != MatchOperand_Success) {
@@ -5404,17 +5425,67 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
Error(S, "a16 modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
- if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
+
+ if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
+ ImmTy = AMDGPUOperand::ImmTyR128A16;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ unsigned CPolOn = 0;
+ unsigned CPolOff = 0;
+ SMLoc S = getLoc();
+
+ if (trySkipId("glc"))
+ CPolOn = AMDGPU::CPol::GLC;
+ else if (trySkipId("noglc"))
+ CPolOff = AMDGPU::CPol::GLC;
+ else if (trySkipId("slc"))
+ CPolOn = AMDGPU::CPol::SLC;
+ else if (trySkipId("noslc"))
+ CPolOff = AMDGPU::CPol::SLC;
+ else if (trySkipId("dlc"))
+ CPolOn = AMDGPU::CPol::DLC;
+ else if (trySkipId("nodlc"))
+ CPolOff = AMDGPU::CPol::DLC;
+ else if (trySkipId("scc"))
+ CPolOn = AMDGPU::CPol::SCC;
+ else if (trySkipId("noscc"))
+ CPolOff = AMDGPU::CPol::SCC;
+ else
+ return MatchOperand_NoMatch;
+
+ if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
Error(S, "dlc modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
- if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
+
+ if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
+ Error(S, "scc modifier is not supported on this GPU");
return MatchOperand_ParseFail;
+ }
- if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
- ImmTy = AMDGPUOperand::ImmTyR128A16;
+ if (CPolSeen & (CPolOn | CPolOff)) {
+ Error(S, "duplicate cache policy modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ CPolSeen |= (CPolOn | CPolOff);
+
+ for (unsigned I = 1; I != Operands.size(); ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (Op.isCPol()) {
+ Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
+ return MatchOperand_Success;
+ }
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
+ AMDGPUOperand::ImmTyCPol));
- Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -6835,36 +6906,43 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
// mubuf
//===----------------------------------------------------------------------===//
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
}
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
- return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const {
+ return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(),
+ AMDGPUOperand::ImmTyCPol);
}
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
- const OperandVector &Operands,
- bool IsAtomic,
- bool IsAtomicReturn,
- bool IsLds) {
+ const OperandVector &Operands,
+ bool IsAtomic,
+ bool IsLds) {
bool IsLdsOpcode = IsLds;
bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
- assert(IsAtomicReturn ? IsAtomic : true);
unsigned FirstOperandIdx = 1;
+ bool IsAtomicReturn = false;
+
+ if (IsAtomic) {
+ for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (!Op.isCPol())
+ continue;
+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
+ break;
+ }
+
+ if (!IsAtomicReturn) {
+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
+ if (NewOpc != -1)
+ Inst.setOpcode(NewOpc);
+ }
+
+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
+ SIInstrFlags::IsAtomicRet;
+ }
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -6915,19 +6993,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
- if (!IsAtomic || IsAtomicReturn) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
- IsAtomicReturn ? -1 : 0);
- }
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -6962,12 +7033,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
}
//===----------------------------------------------------------------------===//
@@ -7009,16 +7077,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
-
- if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1)
- addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTySCCB);
-
- if (IsGFX10Plus)
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@@ -7034,6 +7093,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
cvtMIMG(Inst, Operands, true);
}
+void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+ bool IsAtomicReturn = false;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (!Op.isCPol())
+ continue;
+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
+ break;
+ }
+
+ if (!IsAtomicReturn) {
+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
+ if (NewOpc != -1)
+ Inst.setOpcode(NewOpc);
+ }
+
+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
+ SIInstrFlags::IsAtomicRet;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ if (IsAtomicReturn && i == 1)
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ if ((int)Inst.getNumOperands() <=
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
+}
+
void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
const OperandVector &Operands) {
for (unsigned I = 1; I < Operands.size(); ++I) {
@@ -7127,10 +7241,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
- {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
- {"scc", AMDGPUOperand::ImmTySCCB, true, nullptr},
- {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
- {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
+ {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
@@ -7213,6 +7324,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.ConvertResult);
} else if (Op.Type == AMDGPUOperand::ImmTyDim) {
res = parseDim(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
+ res = parseCPol(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -8078,8 +8191,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
case MCK_lds:
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
- case MCK_glc:
- return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
case MCK_idxen:
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen:
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index eb57d84c1e19..e7930ba151c1 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -6,16 +6,14 @@
//
//===----------------------------------------------------------------------===//
-def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 10, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
-def MUBUFOffset : ComplexPattern<i64, 9, "SelectMUBUFOffset">;
-def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
-def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def BUFAddrKind {
int Offset = 0;
@@ -124,20 +122,17 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
let TSFlags = ps.TSFlags;
bits<12> offset;
- bits<1> glc;
- bits<1> dlc;
+ bits<5> cpol;
bits<7> format;
bits<8> vaddr;
bits<10> vdata;
bits<7> srsrc;
- bits<1> slc;
bits<1> tfe;
bits<8> soffset;
bits<4> dfmt = format{3-0};
bits<3> nfmt = format{6-4};
- bits<1> sccb;
// GFX90A+ only: instruction uses AccVGPR for data
// Bit superceedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
@@ -150,17 +145,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
+ offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
+ offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz)
);
dag InsData = !if(!empty(vaddrList),
(ins vdata_op:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
+ TFE:$tfe, SWZ:$swz),
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
+ TFE:$tfe, SWZ:$swz)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
@@ -212,7 +207,7 @@ class MTBUF_Load_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs getLdStRegisterOperand<vdataClass>.ret:$vdata),
getMTBUFIns<addrKindCopy>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -254,7 +249,7 @@ class MTBUF_Store_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs),
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -357,16 +352,13 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
let UseNamedOperandTable = ps.UseNamedOperandTable;
bits<12> offset;
- bits<1> glc;
- bits<1> dlc;
+ bits<5> cpol;
bits<8> vaddr;
bits<10> vdata;
bits<7> srsrc;
- bits<1> slc;
bits<1> tfe;
bits<8> soffset;
- bits<1> sccb;
// GFX90A+ only: instruction uses AccVGPR for data
// Bit superceedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
@@ -410,19 +402,19 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, GLC:$glc, SLC:$slc),
+ offset:$offset, CPol:$cpol),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, GLC:$glc, SLC:$slc)
+ offset:$offset, CPol:$cpol)
);
dag InsData = !if(!empty(vaddrList),
(ins vdata_op:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
+ SCSrc_b32:$soffset, offset:$offset, CPol:$cpol),
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc)
+ SCSrc_b32:$soffset, offset:$offset, CPol:$cpol)
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
- !if(isLds, (ins DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), (ins TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb))
+ !if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz))
);
}
@@ -495,8 +487,8 @@ class MUBUF_Load_Pseudo <string opName,
(outs vdata_op:$vdata),
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
- !if(isLds, " lds", "$tfe") # "$dlc$swz$sccb",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" #
+ !if(isLds, " lds", "$tfe") # "$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -514,15 +506,15 @@ class MUBUF_Load_Pseudo <string opName,
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
- (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
+ (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
+ (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
- (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
+ (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
+ (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
>;
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
@@ -575,7 +567,7 @@ class MUBUF_Store_Pseudo <string opName,
: MUBUF_Pseudo<opName,
(outs),
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -593,12 +585,12 @@ multiclass MUBUF_Pseudo_Stores<string opName,
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
+ i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
+ i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
@@ -616,8 +608,8 @@ multiclass MUBUF_Pseudo_Stores<string opName,
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
- (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz, SCCB_0:$sccb),
- " $srsrc, $soffset$offset lds$glc$slc$swz$sccb"> {
+ (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz),
+ " $srsrc, $soffset$offset lds$cpol$swz"> {
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
@@ -638,15 +630,15 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
dag ret = !if(vdata_in,
!if(!empty(vaddrList),
(ins vdata_op:$vdata_in,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc),
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol),
(ins vdata_op:$vdata_in, vaddrClass:$vaddr,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc)
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol)
),
!if(!empty(vaddrList),
(ins vdata_op:$vdata,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol),
(ins vdata_op:$vdata, vaddrClass:$vaddr,
- SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol)
));
}
@@ -689,6 +681,7 @@ class MUBUF_Atomic_Pseudo<string opName,
let has_tfe = 0;
let has_sccb = 0;
let maybeAtomic = 1;
+ let AsmMatchConverter = "cvtMubufAtomic";
}
class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
@@ -700,7 +693,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs),
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -708,7 +701,6 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
let dlc_value = 0;
let sccb_value = 0;
let IsAtomicNoRet = 1;
- let AsmMatchConverter = "cvtMubufAtomic";
}
class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
@@ -721,7 +713,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs vdata_op:$vdata),
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
@@ -731,7 +723,6 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
let IsAtomicRet = 1;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
- let AsmMatchConverter = "cvtMubufAtomicReturn";
}
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
@@ -766,15 +757,15 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
- (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
+ (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
- (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
- vdataType:$vdata_in))]>,
+ (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
let FPAtomic = isFP in
@@ -1210,24 +1201,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1236,8 +1224,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@@ -1296,26 +1283,21 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_glc $auxiliary),
- (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_glc $auxiliary),
- (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1324,9 +1306,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary),
- (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary),
+ 0, (extract_swz $auxiliary))
>;
}
@@ -1386,7 +1367,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN)
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@@ -1394,7 +1375,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@@ -1402,7 +1383,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@@ -1412,7 +1393,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_slc $cachepolicy))
+ (set_glc $cachepolicy))
>;
}
@@ -1460,7 +1441,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@@ -1468,7 +1449,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@@ -1476,7 +1457,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_slc $cachepolicy))
+ (as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@@ -1486,7 +1467,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy))
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), $cachepolicy)
>;
}
@@ -1512,7 +1493,7 @@ def : GCNPat<
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (extract_slc $cachepolicy)), VReg_64)), sub0)
+ (set_glc $cachepolicy)), VReg_64)), sub0)
>;
def : GCNPat<
@@ -1523,7 +1504,8 @@ def : GCNPat<
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (set_glc $cachepolicy)), VReg_64)),
sub0)
>;
@@ -1535,7 +1517,8 @@ def : GCNPat<
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (set_glc $cachepolicy)), VReg_64)),
sub0)
>;
@@ -1548,28 +1531,28 @@ def : GCNPat<
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (set_glc $cachepolicy)), VReg_64)),
sub0)
>;
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
+ i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_ld> {
def : GCNPat <
- (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$slc))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
+ (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
- (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
- (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
+ (vt (atomic_ld (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset))),
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
>;
}
@@ -1590,8 +1573,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
- (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
+ i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
+ (Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
}
@@ -1614,12 +1597,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
>;
}
@@ -1629,12 +1612,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
>;
}
@@ -1679,14 +1662,13 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is first
def : GCNPat <
- (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$slc), vt:$val),
- (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
+ (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val),
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
- (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
- (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
+ (atomic_st (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
@@ -1700,8 +1682,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
+ i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)),
+ (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
}
@@ -1715,13 +1697,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
- (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
+ (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
- (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
+ (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0)
>;
}
@@ -1767,8 +1749,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1776,8 +1757,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1785,8 +1765,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1796,8 +1775,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@@ -1835,8 +1813,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1844,8 +1821,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1853,8 +1829,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1864,8 +1839,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
- (extract_swz $auxiliary), (extract_sccb $auxiliary))
+ (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@@ -1907,21 +1881,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{16} = ps.lds;
let Inst{24-18} = op;
let Inst{31-26} = 0x38;
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
- let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
- let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25} = op{7};
}
@@ -1973,16 +1947,33 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
def _BOTHEN_RTN_gfx10 :
- MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>;
def _IDXEN_RTN_gfx10 :
- MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
+ AtomicNoRet<NAME # "_IDXEN_gfx10", 1>;
def _OFFEN_RTN_gfx10 :
- MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
+ AtomicNoRet<NAME # "_OFFEN_gfx10", 1>;
def _OFFSET_RTN_gfx10 :
- MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
+ AtomicNoRet<NAME # "_OFFSET_gfx10", 1>;
}
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
- MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>;
+ MUBUF_Real_Atomics_RTN_gfx10<op> {
+ def _BOTHEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>;
+ def _IDXEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ AtomicNoRet<NAME # "_IDXEN_gfx10", 0>;
+ def _OFFEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ AtomicNoRet<NAME # "_OFFEN_gfx10", 0>;
+ def _OFFSET_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ AtomicNoRet<NAME # "_OFFSET_gfx10", 0>;
+ }
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@@ -2062,18 +2053,38 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
}
- multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
- MUBUF_Real_AllAddr_gfx6_gfx7<op> {
+ multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
+ def _ADDR64_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
+ AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>;
+ def _BOTHEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>;
+ def _IDXEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>;
+ def _OFFEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>;
+ def _OFFSET_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>;
+
def _ADDR64_RTN_gfx6_gfx7 :
- MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>,
+ AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>;
def _BOTHEN_RTN_gfx6_gfx7 :
- MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>;
def _IDXEN_RTN_gfx6_gfx7 :
- MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
+ AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>;
def _OFFEN_RTN_gfx6_gfx7 :
- MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
+ AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>;
def _OFFSET_RTN_gfx6_gfx7 :
- MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
+ AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
@@ -2162,13 +2173,13 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-16} = op;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
- let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
@@ -2179,7 +2190,7 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
- let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25-19} = format;
let Inst{53} = op{3};
}
@@ -2251,15 +2262,17 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
MUBUF_Real<ps>,
Enc64,
- SIMCInstr<ps.PseudoInstr, Enc> {
+ SIMCInstr<ps.PseudoInstr, Enc>,
+ AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0,
+ !if(ps.IsAtomicRet, 1, ?))> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{15} = !if(ps.has_sccb, sccb, ps.sccb_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
+ let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
let Inst{16} = ps.lds;
- let Inst{17} = !if(ps.has_slc, slc, ?);
+ let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@@ -2290,7 +2303,7 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
}
-multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
+multiclass MUBUF_Real_AllAddr_vi<bits<7> op, bit isAtomic = 0, bit isAtomicRet = 0> {
defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
@@ -2346,9 +2359,9 @@ class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{16} = ps.lds;
- let Inst{17} = !if(ps.has_slc, slc, ?);
+ let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@@ -2366,7 +2379,7 @@ multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> {
}
multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
- MUBUF_Real_AllAddr_vi<op> {
+ MUBUF_Real_AllAddr_vi<op, 1, 0> {
defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
@@ -2488,7 +2501,7 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-15} = op;
let Inst{22-19} = dfmt;
let Inst{25-23} = nfmt;
@@ -2496,8 +2509,9 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
- let Inst{53} = !if(ps.has_sccb, sccb, ps.sccb_value);
- let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
+ let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
@@ -2540,7 +2554,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
- let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-15} = op;
let Inst{22-19} = dfmt;
let Inst{25-23} = nfmt;
@@ -2548,7 +2562,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
- let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4b535f81ecbf..7cd5070f596c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -541,9 +541,20 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
- insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
+ int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::cpol);
+ if (CPolPos != -1) {
+ unsigned CPol =
+ (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
+ AMDGPU::CPol::GLC : 0;
+ if (MI.getNumOperands() <= (unsigned)CPolPos) {
+ insertNamedMCOperand(MI, MCOperand::createImm(CPol),
+ AMDGPU::OpName::cpol);
+ } else if (CPol) {
+ MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
+ }
+ }
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
@@ -559,20 +570,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::FLAT |
- SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
- if (!isGFX10()) {
- int DLCOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dlc);
- if (DLCOpIdx != -1) {
- auto DLCIter = MI.begin();
- std::advance(DLCIter, DLCOpIdx);
- MI.insert(DLCIter, MCOperand::createImm(0));
- }
- }
- }
-
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
int SWZOpIdx =
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 4273c44df951..ede74605a81c 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -94,12 +94,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<7> saddr;
bits<10> vdst;
- bits<1> slc;
- bits<1> glc;
- bits<1> dlc;
-
- // Only valid on gfx90a+
- bits<1> sccb;
+ bits<5> cpol;
// Only valid on gfx9
bits<1> lds = 0; // XXX - What does this actually do?
@@ -122,8 +117,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{13} = lds;
let Inst{15-14} = seg;
- let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
- let Inst{17} = slc;
+ let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
+ let Inst{17} = cpol{CPolBit.SLC};
let Inst{24-18} = op;
let Inst{31-26} = 0x37; // Encoding.
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@@ -156,9 +151,9 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
(ins VReg_64:$vaddr)),
(ins flat_offset:$offset)),
// FIXME: Operands with default values do not work with following non-optional operands.
- !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, vdata_op:$vdst_in),
- (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
- " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
+ !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
+ (ins CPol_0:$cpol))),
+ " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = HasSaddr;
@@ -178,8 +173,8 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
!if(EnableSaddr,
(ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
- (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb)),
- " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -203,9 +198,9 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
opName,
(outs regClass:$vdst),
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
- (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
+ (ins flat_offset:$offset, CPol_0:$cpol),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
- " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
+ " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let is_flat_global = 1;
let has_data = 0;
let mayLoad = 1;
@@ -241,8 +236,8 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
opName,
(outs),
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
- (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc, SCCB_0:$sccb)),
- " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
+ (ins flat_offset:$offset, CPol:$cpol)),
+ " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let is_flat_global = 1;
let mayLoad = 0;
let mayStore = 1;
@@ -280,9 +275,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
!if(EnableVaddr,
(ins VGPR_32:$vaddr, flat_offset:$offset),
(ins flat_offset:$offset))),
- !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, getLdStRegisterOperand<regClass>.ret:$vdst_in),
- (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
- " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
+ !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
+ (ins CPol_0:$cpol))),
+ " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = 1;
@@ -301,11 +296,11 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
opName,
(outs),
!if(EnableSaddr,
- (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
+ (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
!if(EnableVaddr,
- (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
- (ins vdata_op:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
- " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
+ (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
+ (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
+ " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -382,8 +377,8 @@ multiclass FLAT_Atomic_Pseudo<
RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
- " $vaddr, $vdata$offset$slc$sccb">,
+ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
+ " $vaddr, $vdata$offset$cpol">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
@@ -393,8 +388,8 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
- (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
- " $vdst, $vaddr, $vdata$offset$glc1$slc$sccb",
+ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
+ " $vdst, $vaddr, $vdata$offset$cpol",
[(set vt:$vdst,
(atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
@@ -416,8 +411,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
- " $vaddr, $vdata, off$offset$slc$sccb">,
+ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
+ " $vaddr, $vdata, off$offset$cpol">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let has_saddr = 1;
@@ -427,8 +422,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
- " $vaddr, $vdata, $saddr$offset$slc$sccb">,
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ " $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
let has_saddr = 1;
@@ -451,8 +446,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
- (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
- " $vdst, $vaddr, $vdata, off$offset$glc1$slc$sccb",
+ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
+ " $vdst, $vaddr, $vdata, off$offset$cpol",
[(set vt:$vdst,
(atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
@@ -463,8 +458,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
- (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
- " $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc$sccb">,
+ (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
+ " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
@@ -827,17 +822,17 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
- (inst $vaddr, $offset, 0, 0, 0, 0, $in)
+ (inst $vaddr, $offset, 0, $in)
>;
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
- (inst $vaddr, $offset, 0, 0, 0, 0, $in)
+ (inst $vaddr, $offset, 0, $in)
>;
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
- (inst $saddr, $voffset, $offset, 0, 0, 0, 0, $in)
+ (inst $saddr, $voffset, $offset, 0, $in)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -847,7 +842,7 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
- (inst $saddr, $voffset, $offset, 0, 0, 0)
+ (inst $saddr, $voffset, $offset, 0)
>;
class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -928,7 +923,7 @@ class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
- (inst $vaddr, $offset, 0, 0, 0, 0, $in)
+ (inst $vaddr, $offset, 0, $in)
>;
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -943,7 +938,7 @@ class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
- (inst $saddr, $offset, 0, 0, 0, 0, $in)
+ (inst $saddr, $offset, 0, $in)
>;
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -1390,7 +1385,7 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
- let Inst{25} = !if(has_sccb, sccb, ps.sccbValue);
+ let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
let AsmString = ps.Mnemonic #
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
}
@@ -1567,7 +1562,7 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
let DecoderNamespace = "GFX10";
let Inst{11-0} = offset{11-0};
- let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
+ let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
let Inst{55} = 0;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index d3d0b4a44e85..4ff74d7028a8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -202,26 +202,19 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "gds");
}
-void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- if (AMDGPU::isGFX10Plus(STI))
- printNamedBit(MI, OpNo, O, "dlc");
-}
-
-void AMDGPUInstPrinter::printSCCB(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- if (AMDGPU::isGFX90A(STI))
- printNamedBit(MI, OpNo, O, "scc");
-}
-
-void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "glc");
-}
-
-void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "slc");
+ auto Imm = MI->getOperand(OpNo).getImm();
+ if (Imm & CPol::GLC)
+ O << " glc";
+ if (Imm & CPol::SLC)
+ O << " slc";
+ if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
+ O << " dlc";
+ if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
+ O << " scc";
+ if (Imm & ~CPol::ALL)
+ O << " /* unexpected cache policy bit */";
}
void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 121195fe8e20..3cb4fcb28cb0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -68,14 +68,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printSCCB(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printCPol(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 2127bcf447d5..7a7f397abc61 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -263,10 +263,10 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
string dns="">
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -276,10 +276,10 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
string dns="">
: MIMG_gfx90a <op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -288,10 +288,10 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -301,10 +301,10 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -389,10 +389,10 @@ class MIMG_Store_Helper <mimgopc op, string asm,
string dns = "">
: MIMG_gfx6789<op.BASE, (outs), dns> {
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -403,10 +403,10 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
: MIMG_gfx90a<op.BASE, (outs), dns> {
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -415,10 +415,10 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs), dns> {
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
- DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -429,10 +429,10 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -507,9 +507,9 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
- let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da";
+ let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
}
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
@@ -520,9 +520,9 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da);
- let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da";
+ let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
}
class MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc,
@@ -555,9 +555,9 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
- DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
- let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
@@ -571,9 +571,9 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
@@ -660,10 +660,10 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -671,10 +671,10 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx90a<op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
- DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
+ DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$lwe$da"
+ let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -683,11 +683,11 @@ class MIMG_Sampler_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
- DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$a16$tfe$lwe"
+ #"$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -697,11 +697,11 @@ class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$a16$tfe$lwe"
+ #"$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -890,9 +890,7 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
dmask = 0xf,
unorm = 1,
d16 = 0,
- glc = 0,
- slc = 0,
- dlc = 0,
+ cpol = 0,
tfe = 0,
lwe = 0,
r128 = 1,
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index bd47f6b495ff..9ef87fec0ca1 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -276,6 +276,18 @@ enum : unsigned {
} // namespace AMDGPU
namespace AMDGPU {
+namespace CPol {
+
+enum CPol {
+ GLC = 1,
+ SLC = 2,
+ DLC = 4,
+ SCC = 16,
+ ALL = GLC | SLC | DLC | SCC
+};
+
+} // namespace CPol
+
namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
enum Id { // Message ID, width(4) [3:0].
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 8d5f5145e26b..c9490da5efbd 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -135,10 +135,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(SpillReg, RegState::Kill)
.addReg(SPReg)
.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // dlc
- .addImm(0) // scc
+ .addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@@ -148,12 +145,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.addMemOperand(MMO);
return;
}
@@ -180,10 +174,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(SpillReg, RegState::Kill)
.addReg(OffsetReg, HasOffsetReg ? RegState::Kill : 0)
.addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // dlc
- .addImm(0) // scc
+ .addImm(0) // cpol
.addMemOperand(MMO);
if (!HasOffsetReg) {
@@ -205,12 +196,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.addMemOperand(MMO);
} else {
// No free register, use stack pointer and restore afterwards.
@@ -223,12 +211,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.addMemOperand(MMO);
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SUB_U32), SPReg)
@@ -259,10 +244,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
.addReg(SPReg)
.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // dlc
- .addImm(0) // scc
+ .addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@@ -278,10 +260,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
SpillReg)
.addReg(OffsetReg, RegState::Kill)
.addImm(0)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // dlc
- .addImm(0) // scc
+ .addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@@ -292,12 +271,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.addMemOperand(MMO);
return;
}
@@ -316,12 +292,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0)
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.addMemOperand(MMO);
}
@@ -416,8 +389,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
.addReg(FlatScrInit)
.addImm(EncodedOffset) // offset
- .addImm(0) // glc
- .addImm(0) // dlc
+ .addImm(0) // cpol
.addMemOperand(MMO);
// Mask the offset in [47:0] of the descriptor
@@ -713,8 +685,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
- .addImm(0) // glc
- .addImm(0) // dlc
+ .addImm(0) // cpol
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
@@ -748,8 +719,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // dlc
+ .addImm(0) // cpol
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index acdfb7eeea7d..5a3eaa6e2c36 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5768,28 +5768,6 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
return DAG.getBuildVector(Type, DL, VecElts);
}
-static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
- SDValue *GLC, SDValue *SLC, SDValue *DLC) {
- auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
-
- uint64_t Value = CachePolicyConst->getZExtValue();
- SDLoc DL(CachePolicy);
- if (GLC) {
- *GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
- Value &= ~(uint64_t)0x1;
- }
- if (SLC) {
- *SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
- Value &= ~(uint64_t)0x2;
- }
- if (DLC) {
- *DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
- Value &= ~(uint64_t)0x4;
- }
-
- return Value == 0;
-}
-
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
SDValue Src, int ExtraElts) {
EVT SrcVT = Src.getValueType();
@@ -6184,19 +6162,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
- SDValue GLC;
- SDValue SLC;
- SDValue DLC;
- if (BaseOpcode->Atomic) {
- GLC = True; // TODO no-return optimization
- if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
- DAG, nullptr, &SLC, IsGFX10Plus ? &DLC : nullptr))
- return Op;
- } else {
- if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
- DAG, &GLC, &SLC, IsGFX10Plus ? &DLC : nullptr))
- return Op;
- }
+ unsigned CPol = cast<ConstantSDNode>(
+ Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
+ if (BaseOpcode->Atomic)
+ CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ if (CPol & ~AMDGPU::CPol::ALL)
+ return Op;
SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
@@ -6212,12 +6183,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
- if (!IsGFX10Plus)
- Ops.push_back(DAG.getTargetConstant(0, SDLoc(), MVT::i1));
- if (IsGFX10Plus)
- Ops.push_back(DLC);
- Ops.push_back(GLC);
- Ops.push_back(SLC);
+ Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
if (IsGFX10Plus)
@@ -6526,11 +6492,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
- bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
- SDValue GLC;
- SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
- if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
- IsGFX10Plus ? &DLC : nullptr))
+ unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ if (CPol & ~AMDGPU::CPol::ALL)
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
@@ -11229,10 +11192,12 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
if (NoRetAtomicOp != -1) {
if (!Node->hasAnyUseOfValue(0)) {
- int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::glc1);
- if (Glc1Idx != -1)
- MI.RemoveOperand(Glc1Idx);
+ int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::cpol);
+ if (CPolIdx != -1) {
+ MachineOperand &CPol = MI.getOperand(CPolIdx);
+ CPol.setImm(CPol.getImm() & ~AMDGPU::CPol::GLC);
+ }
MI.RemoveOperand(0);
MI.setDesc(TII->get(NoRetAtomicOp));
return;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index c0ec91d78839..df3571e86006 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -261,6 +261,13 @@ class Enc64 {
int Size = 8;
}
+def CPolBit {
+ int GLC = 0;
+ int SLC = 1;
+ int DLC = 2;
+ int SCC = 4;
+}
+
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
class VINTRPe <bits<2> op> : Enc32 {
@@ -281,21 +288,20 @@ class MIMGe : Enc64 {
bits<10> vdata;
bits<4> dmask;
bits<1> unorm;
- bits<1> glc;
+ bits<5> cpol;
bits<1> r128;
bits<1> tfe;
bits<1> lwe;
- bits<1> slc;
bit d16;
bits<7> srsrc;
bits<7> ssamp;
let Inst{11-8} = dmask;
let Inst{12} = unorm;
- let Inst{13} = glc;
+ let Inst{13} = cpol{CPolBit.GLC};
let Inst{15} = r128;
let Inst{17} = lwe;
- let Inst{25} = slc;
+ let Inst{25} = cpol{CPolBit.SLC};
let Inst{31-26} = 0x3c;
let Inst{47-40} = vdata{7-0};
let Inst{52-48} = srsrc{6-2};
@@ -306,10 +312,9 @@ class MIMGe : Enc64 {
class MIMGe_gfx6789 <bits<8> op> : MIMGe {
bits<8> vaddr;
bits<1> da;
- bits<1> sccb;
let Inst{0} = op{7};
- let Inst{7} = sccb;
+ let Inst{7} = cpol{CPolBit.SCC};
let Inst{14} = da;
let Inst{16} = tfe;
let Inst{24-18} = op{6-0};
@@ -319,10 +324,9 @@ class MIMGe_gfx6789 <bits<8> op> : MIMGe {
class MIMGe_gfx90a <bits<8> op> : MIMGe {
bits<8> vaddr;
bits<1> da;
- bits<1> sccb;
let Inst{0} = op{7};
- let Inst{7} = sccb;
+ let Inst{7} = cpol{CPolBit.SCC};
let Inst{14} = da;
let Inst{16} = vdata{9}; // ACC bit
let Inst{24-18} = op{6-0};
@@ -333,13 +337,12 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
bits<8> vaddr0;
bits<3> dim;
bits<2> nsa;
- bits<1> dlc;
bits<1> a16;
let Inst{0} = op{7};
let Inst{2-1} = nsa;
let Inst{5-3} = dim;
- let Inst{7} = dlc;
+ let Inst{7} = cpol{CPolBit.DLC};
let Inst{16} = tfe;
let Inst{24-18} = op{6-0};
let Inst{39-32} = vaddr0;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b8c1487cc8e0..4035ff1817c1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5588,21 +5588,10 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
.add(*SOffset)
.add(*Offset);
- // Atomics do not have this operand.
- if (const MachineOperand *GLC =
- getNamedOperand(MI, AMDGPU::OpName::glc)) {
- MIB.addImm(GLC->getImm());
+ if (const MachineOperand *CPol =
+ getNamedOperand(MI, AMDGPU::OpName::cpol)) {
+ MIB.addImm(CPol->getImm());
}
- if (const MachineOperand *DLC =
- getNamedOperand(MI, AMDGPU::OpName::dlc)) {
- MIB.addImm(DLC->getImm());
- }
- if (const MachineOperand *SCCB =
- getNamedOperand(MI, AMDGPU::OpName::sccb)) {
- MIB.addImm(SCCB->getImm());
- }
-
- MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
if (const MachineOperand *TFE =
getNamedOperand(MI, AMDGPU::OpName::tfe)) {
@@ -5622,7 +5611,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
.addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset)
- .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
+ .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol))
.cloneMemRefs(MI);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0082a74e7d2a..af51434514df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -818,24 +818,16 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
// MUBUF/SMEM Patterns
//===----------------------------------------------------------------------===//
-def extract_glc : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
-}]>;
-
-def extract_slc : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
-}]>;
-
-def extract_dlc : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+def extract_cpol : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8);
}]>;
def extract_swz : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
}]>;
-def extract_sccb : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 1, SDLoc(N), MVT::i8);
+def set_glc : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
}]>;
//===----------------------------------------------------------------------===//
@@ -1090,6 +1082,12 @@ class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
let ParserMatchClass = MatchClass;
}
+class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i32, (ops (i32 1))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@@ -1113,18 +1111,9 @@ def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
-def SCCB : NamedOperandBit<"SCCB", NamedMatchClass<"SCCB">>;
-def SCCB_0 : NamedOperandBit_0<"SCCB", NamedMatchClass<"SCCB">>;
-
-def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
-def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>;
-
-def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
-def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>;
-def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>;
-
-def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
-def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;
+def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>;
+def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
+def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 8f970a912ea2..6c130f068b36 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -104,10 +104,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
unsigned BaseOff;
unsigned DMask;
InstClassEnum InstClass;
- bool GLC = 0;
- bool SLC = 0;
- bool DLC = 0;
- bool SCCB = 0; // vmem only.
+ unsigned CPol;
bool UseST64;
int AddrIdx[MaxAddressRegs];
const MachineOperand *AddrReg[MaxAddressRegs];
@@ -533,14 +530,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
Offset &= 0xffff;
} else if (InstClass != MIMG) {
- GLC = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
- if (InstClass != S_BUFFER_LOAD_IMM) {
- SLC = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
- }
- DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
- if (InstClass != S_BUFFER_LOAD_IMM) {
- SCCB = TII.getNamedOperand(*I, AMDGPU::OpName::sccb)->getImm();
- }
+ CPol = TII.getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm();
}
AddressRegs Regs = getRegs(Opc, TII);
@@ -690,10 +680,9 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
return false;
// Check other optional immediate operands for equality.
- unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
- AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
- AMDGPU::OpName::da, AMDGPU::OpName::r128,
- AMDGPU::OpName::a16, AMDGPU::OpName::dlc};
+ unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16,
+ AMDGPU::OpName::unorm, AMDGPU::OpName::da,
+ AMDGPU::OpName::r128, AMDGPU::OpName::a16};
for (auto op : OperandsToMatch) {
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
@@ -798,9 +787,8 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width == EltOffset1 ||
EltOffset1 + Paired.Width == EltOffset0) &&
- CI.GLC == Paired.GLC && CI.DLC == Paired.DLC &&
- (CI.InstClass == S_BUFFER_LOAD_IMM ||
- (CI.SLC == Paired.SLC && CI.SCCB == Paired.SCCB));
+ CI.CPol == Paired.CPol &&
+ (CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol);
}
// If the offset in elements doesn't fit in 8-bits, we might be able to use
@@ -1301,8 +1289,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
- .addImm(CI.GLC) // glc
- .addImm(CI.DLC) // dlc
+ .addImm(CI.CPol) // cpol
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
@@ -1361,12 +1348,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(MergedOffset) // offset
- .addImm(CI.GLC) // glc
- .addImm(CI.SLC) // slc
+ .addImm(CI.CPol) // cpol
.addImm(0) // tfe
- .addImm(CI.DLC) // dlc
.addImm(0) // swz
- .addImm(CI.SCCB) // scc
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
@@ -1429,12 +1413,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(MergedOffset) // offset
.addImm(JoinedFormat) // format
- .addImm(CI.GLC) // glc
- .addImm(CI.SLC) // slc
+ .addImm(CI.CPol) // cpol
.addImm(0) // tfe
- .addImm(CI.DLC) // dlc
.addImm(0) // swz
- .addImm(CI.SCCB) // scc
.addMemOperand(
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
@@ -1510,12 +1491,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
.addImm(JoinedFormat) // format
- .addImm(CI.GLC) // glc
- .addImm(CI.SLC) // slc
+ .addImm(CI.CPol) // cpol
.addImm(0) // tfe
- .addImm(CI.DLC) // dlc
.addImm(0) // swz
- .addImm(CI.SCCB) // scc
.addMemOperand(
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
@@ -1665,12 +1643,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
- .addImm(CI.GLC) // glc
- .addImm(CI.SLC) // slc
+ .addImm(CI.CPol) // cpol
.addImm(0) // tfe
- .addImm(CI.DLC) // dlc
.addImm(0) // swz
- .addImm(CI.SCCB) // scc
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
moveInstsAfter(MIB, InstsToMove);
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index d4dd5a381a71..da144063c2b7 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -84,22 +84,6 @@ enum class SIAtomicAddrSpace {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
};
-/// Sets named bit \p BitName to "true" if present in instruction \p MI.
-/// \returns Returns true if \p MI is modified, false otherwise.
-template <uint16_t BitName>
-bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
- int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
- if (BitIdx == -1)
- return false;
-
- MachineOperand &Bit = MI->getOperand(BitIdx);
- if (Bit.getImm() != 0)
- return false;
-
- Bit.setImm(1);
- return true;
-}
-
class SIMemOpInfo final {
private:
@@ -288,6 +272,11 @@ class SICacheControl {
SICacheControl(const GCNSubtarget &ST);
+ /// Sets named bit \p BitName to "true" if present in instruction \p MI.
+ /// \returns Returns true if \p MI is modified, false otherwise.
+ bool enableNamedBit(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Bit) const;
+
public:
/// Create a cache control for the subtarget \p ST.
@@ -369,13 +358,13 @@ class SIGfx6CacheControl : public SICacheControl {
/// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
- return enableNamedBit<AMDGPU::OpName::glc>(MI);
+ return enableNamedBit(MI, AMDGPU::CPol::GLC);
}
/// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
- return enableNamedBit<AMDGPU::OpName::slc>(MI);
+ return enableNamedBit(MI, AMDGPU::CPol::SLC);
}
public:
@@ -436,7 +425,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl {
/// Sets SCC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableSCCBit(const MachineBasicBlock::iterator &MI) const {
- return enableNamedBit<AMDGPU::OpName::sccb>(MI);
+ return enableNamedBit(MI, AMDGPU::CPol::SCC);;
}
public:
@@ -485,7 +474,7 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
- return enableNamedBit<AMDGPU::OpName::dlc>(MI);
+ return enableNamedBit(MI, AMDGPU::CPol::DLC);
}
public:
@@ -785,6 +774,16 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
InsertCacheInv = !AmdgcnSkipCacheInvalidations;
}
+bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Bit) const {
+ MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
+ if (!CPol)
+ return false;
+
+ CPol->setImm(CPol->getImm() | Bit);
+ return true;
+}
+
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c93081893b71..0f44182660a5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -743,12 +743,9 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
.addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
+ .addImm(0) // cpol
.addImm(0) // tfe
- .addImm(0) // dlc
.addImm(0) // swz
- .addImm(0) // scc
.cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -1010,13 +1007,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
MIB.addReg(SOffset, SOffsetRegState);
}
MIB.addImm(Offset + RemRegOffset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // tfe for MUBUF or dlc for FLAT
+ .addImm(0); // cpol
if (!IsFlat)
- MIB.addImm(0) // dlc
+ MIB.addImm(0) // tfe
.addImm(0); // swz
- MIB.addImm(0); // scc
MIB.addMemOperand(NewMMO);
if (!IsAGPR && NeedSuperRegDef)
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index d2b5652a0db8..2df33f196816 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -71,6 +71,7 @@ class SM_Real <SM_Pseudo ps>
bits<7> sdst;
bits<32> offset;
bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
+ bits<5> cpol;
}
class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
@@ -122,8 +123,8 @@ multiclass SM_Pseudo_Loads<string opName,
RegisterClass dstClass> {
def _IMM : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
- " $sdst, $sbase, $offset$glc$dlc", []> {
+ (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
+ " $sdst, $sbase, $offset$cpol", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
@@ -133,8 +134,8 @@ multiclass SM_Pseudo_Loads<string opName,
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
- " $sdst, $sbase, $offset$glc$dlc", []> {
+ (ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
+ " $sdst, $sbase, $offset$cpol", []> {
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
let has_glc = 1;
@@ -146,8 +147,8 @@ multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
- " $sdata, $sbase, $offset$glc$dlc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
+ " $sdata, $sbase, $offset$cpol", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
@@ -155,8 +156,8 @@ multiclass SM_Pseudo_Stores<string opName,
}
def _SGPR : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
- " $sdata, $sbase, $offset$glc$dlc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
+ " $sdata, $sbase, $offset$cpol", []> {
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
@@ -232,6 +233,8 @@ class SM_Atomic_Pseudo <string opName,
let IsAtomicNoRet = !not(isRet);
let IsAtomicRet = isRet;
+
+ let AsmMatchConverter = "cvtSMEMAtomic";
}
class SM_Pseudo_Atomic<string opName,
@@ -241,13 +244,14 @@ class SM_Pseudo_Atomic<string opName,
bit isRet,
string opNameWithSuffix = opName # !if(isImm,
!if(isRet, "_IMM_RTN", "_IMM"),
- !if(isRet, "_SGPR_RTN", "_SGPR"))> :
+ !if(isRet, "_SGPR_RTN", "_SGPR")),
+ Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc),
- (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
- !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
+ (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
+ (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)),
+ !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol",
isRet>,
AtomicNoRet <opNameWithSuffix, isRet> {
let offset_is_imm = isImm;
@@ -463,13 +467,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_si : SMRD_Real_si <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
}
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@@ -497,15 +501,13 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
: SM_Real<ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
, Enc64 {
- bit glc;
-
let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
- let Inst{16} = !if(ps.has_glc, glc, ?);
+ let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
let Inst{17} = imm;
let Inst{25-18} = op;
let Inst{31-26} = 0x30; //encoding
@@ -519,10 +521,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@@ -540,11 +542,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@@ -604,8 +606,8 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let glc = ps.glc;
- let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+ let cpol{CPolBit.GLC} = ps.glc;
+ let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
@@ -694,7 +696,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
- let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
let LGKM_CNT = ps.LGKM_CNT;
let mayLoad = ps.mayLoad;
@@ -772,26 +774,26 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 1. IMM offset
def : GCNPat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
+ (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
let OtherPredicates = [isGFX7Only];
}
// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
>;
// 4. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
>;
}
@@ -799,8 +801,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
- (extract_dlc $cachepolicy)))> {
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
let AddedComplexity = 2;
}
@@ -808,7 +809,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
- (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
+ (extract_cpol $cachepolicy))> {
let OtherPredicates = [isGFX7Only];
let AddedComplexity = 1;
}
@@ -816,8 +817,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
- (extract_dlc $cachepolicy)))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
>;
}
@@ -883,16 +883,13 @@ def : GCNPat <
class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
- bit glc;
- bit dlc;
-
let AssemblerPredicate = isGFX10Plus;
let DecoderNamespace = "GFX10";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
- let Inst{14} = !if(ps.has_dlc, dlc, ?);
- let Inst{16} = !if(ps.has_glc, glc, ?);
+ let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
+ let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
let Inst{25-18} = op;
let Inst{31-26} = 0x3d;
let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
@@ -904,10 +901,10 @@ multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@@ -924,11 +921,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@@ -987,15 +984,14 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
bits<7> sdata;
- bit dlc;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let glc = ps.glc;
+ let cpol{CPolBit.GLC} = ps.glc;
- let Inst{14} = !if(ps.has_dlc, dlc, 0);
- let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+ let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
+ let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
index c8a8116bb2d7..d73e7e41dc86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -19,7 +19,7 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -27,7 +27,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -35,7 +35,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -71,7 +71,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -79,7 +79,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -97,7 +97,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -125,7 +125,7 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -133,7 +133,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -141,7 +141,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -177,7 +177,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -185,7 +185,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -203,7 +203,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -241,7 +241,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -259,7 +259,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -277,7 +277,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -305,21 +305,21 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@@ -343,21 +343,21 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = COPY $vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
index 0b7f99980557..3bcc0932fcb6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
@@ -26,7 +26,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
@@ -40,7 +40,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
@@ -49,7 +49,7 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -57,7 +57,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -65,7 +65,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -73,7 +73,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -104,7 +104,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@@ -118,7 +118,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@@ -137,7 +137,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -155,7 +155,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -163,7 +163,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -171,7 +171,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -204,7 +204,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
@@ -218,7 +218,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
@@ -227,7 +227,7 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -235,7 +235,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -243,7 +243,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -251,7 +251,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -282,7 +282,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@@ -296,7 +296,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@@ -315,7 +315,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -333,7 +333,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -341,7 +341,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -349,7 +349,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -392,7 +392,7 @@ body: |
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY7]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
@@ -416,7 +416,7 @@ body: |
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY7]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
@@ -435,7 +435,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -453,7 +453,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -461,7 +461,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -469,7 +469,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -502,7 +502,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -515,7 +515,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@@ -523,28 +523,28 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@@ -573,7 +573,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -586,7 +586,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@@ -594,28 +594,28 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = COPY $vgpr4_vgpr5
@@ -643,7 +643,7 @@ body: |
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
@@ -656,7 +656,7 @@ body: |
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
@@ -666,7 +666,7 @@ body: |
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -675,7 +675,7 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -684,7 +684,7 @@ body: |
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -693,7 +693,7 @@ body: |
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -723,7 +723,7 @@ body: |
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
@@ -736,7 +736,7 @@ body: |
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
@@ -756,7 +756,7 @@ body: |
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -775,7 +775,7 @@ body: |
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -784,7 +784,7 @@ body: |
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@@ -793,7 +793,7 @@ body: |
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
- ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
index 1362980492a7..e60ad00707b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
@@ -17,19 +17,19 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -51,17 +51,17 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0)
@@ -91,13 +91,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -113,7 +113,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -147,12 +147,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -167,7 +167,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
@@ -199,13 +199,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -221,7 +221,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -255,12 +255,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -275,7 +275,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
@@ -307,13 +307,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -329,7 +329,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -363,12 +363,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -383,7 +383,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
@@ -415,7 +415,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -431,7 +431,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -447,7 +447,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -481,7 +481,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -496,7 +496,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -511,7 +511,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
@@ -533,19 +533,19 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -567,17 +567,17 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 0)
@@ -607,13 +607,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -629,7 +629,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -663,12 +663,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -683,7 +683,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
index 97d2dbe584df..9f1bc538740f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
@@ -18,7 +18,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@@ -29,19 +29,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -63,7 +63,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -73,17 +73,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1)
@@ -113,7 +113,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@@ -124,19 +124,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -170,7 +170,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -180,17 +180,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
@@ -222,7 +222,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@@ -233,13 +233,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -255,7 +255,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -289,7 +289,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -299,12 +299,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -319,7 +319,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
@@ -351,7 +351,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@@ -362,13 +362,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -384,7 +384,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -418,7 +418,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -428,12 +428,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -448,7 +448,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
@@ -480,7 +480,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@@ -492,7 +492,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -508,7 +508,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -524,7 +524,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@@ -558,7 +558,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -569,7 +569,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
- ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -584,7 +584,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -599,7 +599,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
@@ -621,7 +621,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s64
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -632,19 +632,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -666,7 +666,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -676,17 +676,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 1)
@@ -716,7 +716,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -727,13 +727,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -749,7 +749,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -783,7 +783,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
+ ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -793,12 +793,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -813,7 +813,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
- ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
+ ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 32a5fe23ebbb..c9b9c16ed4f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -17,12 +17,12 @@ body: |
; WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy
; WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
@@ -46,7 +46,7 @@ body: |
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
- ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -55,7 +55,7 @@ body: |
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
- ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@@ -85,7 +85,7 @@ body: |
; WAVE64: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec
; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
- ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -96,7 +96,7 @@ body: |
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
- ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@@ -124,14 +124,14 @@ body: |
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
- ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
- ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
index ce4a2003551d..8df94adbb36b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
@@ -24,9 +24,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@@ -92,9 +92,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
index a41774bc6c39..c805deecb19e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
@@ -25,9 +25,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@@ -91,9 +91,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
index 8f65349d9056..caae91300410 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
@@ -24,9 +24,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@@ -92,9 +92,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
index a1381052ecc3..c24ba7a6a4f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
@@ -25,9 +25,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@@ -91,9 +91,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
index 9942493d12a5..950b00ff3ec8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
@@ -18,9 +18,9 @@ body: |
; GCN: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = COPY $vgpr1
@@ -133,16 +133,16 @@ body: |
; GCN: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(p1) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 74d1f9d96d73..92ffb4f131f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -18,16 +18,16 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; VI-LABEL: name: fptoui
; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
index adba00b6cae3..fbb7c1778f30 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
@@ -14,10 +14,10 @@ body: |
; CHECK-LABEL: name: fract_f64_neg
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
- ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
+ ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -26,7 +26,7 @@ body: |
; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; CHECK: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
@@ -63,10 +63,10 @@ body: |
; CHECK-LABEL: name: fract_f64_neg_abs
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
- ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
+ ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -75,7 +75,7 @@ body: |
; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; CHECK: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
index 623b5c341886..5db482133664 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
@@ -99,7 +99,7 @@ body: |
; GCN-LABEL: name: implicit_def_p1_vgpr
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -117,7 +117,7 @@ body: |
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p3) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -134,7 +134,7 @@ body: |
; GCN-LABEL: name: implicit_def_p4_vgpr
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p4) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
index d156a99766b9..72d288bf1a85 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
@@ -16,12 +16,12 @@ body: |
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
@@ -97,12 +97,12 @@ body: |
; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
+ ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
@@ -242,7 +242,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
@@ -257,7 +257,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@@ -291,12 +291,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
index f6ab8088ee65..0d592bcb3b14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
@@ -23,7 +23,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
@@ -33,17 +33,17 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
@@ -144,7 +144,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
@@ -154,17 +154,17 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
@@ -349,7 +349,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -369,7 +369,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -384,12 +384,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@@ -418,7 +418,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
@@ -428,7 +428,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -443,12 +443,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@@ -487,7 +487,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -507,7 +507,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -522,12 +522,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 892d75445db2..bfe865dd6cc7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -19,22 +19,22 @@ body: |
; GFX6-LABEL: name: load_constant_s32_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@@ -57,22 +57,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s16_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_v2s16_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_v2s16_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_v2s16_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@@ -94,22 +94,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v2s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v2s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v2s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@@ -130,22 +130,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s32_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v2s32_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v2s32_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v2s32_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@@ -166,22 +166,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s16_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v4s16_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v4s16_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v4s16_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@@ -203,22 +203,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s32_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX7-LABEL: name: load_constant_v4s32_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX8-LABEL: name: load_constant_v4s32_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX10-LABEL: name: load_constant_v4s32_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
@@ -240,22 +240,22 @@ body: |
; GFX6-LABEL: name: load_constant_s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@@ -277,22 +277,22 @@ body: |
; GFX6-LABEL: name: load_constant_s64_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_s64_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_s64_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_s64_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@@ -314,22 +314,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX7-LABEL: name: load_constant_v2s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX8-LABEL: name: load_constant_v2s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX10-LABEL: name: load_constant_v2s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
@@ -425,22 +425,22 @@ body: |
; GFX6-LABEL: name: load_constant_p3_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_p3_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_p3_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_p3_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@@ -462,22 +462,22 @@ body: |
; GFX6-LABEL: name: load_constant_p1_from_8
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_p1_from_8
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_p1_from_8
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_p1_from_8
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@@ -573,22 +573,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s16
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_v2s16
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_v2s16
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_v2s16
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@@ -610,22 +610,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s16
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v4s16
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v4s16
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v4s16
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@@ -684,22 +684,22 @@ body: |
; GFX6-LABEL: name: load_constant_v8s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX7-LABEL: name: load_constant_v8s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX8-LABEL: name: load_constant_v8s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX10-LABEL: name: load_constant_v8s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, align 4, addrspace 4)
@@ -721,22 +721,22 @@ body: |
; GFX6-LABEL: name: load_constant_v16s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX7-LABEL: name: load_constant_v16s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX8-LABEL: name: load_constant_v16s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX10-LABEL: name: load_constant_v16s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
@@ -758,22 +758,22 @@ body: |
; GFX6-LABEL: name: load_constant_v8s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX7-LABEL: name: load_constant_v8s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX8-LABEL: name: load_constant_v8s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX10-LABEL: name: load_constant_v8s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s64>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
@@ -799,22 +799,22 @@ body: |
; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1020
@@ -839,22 +839,22 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1024
@@ -879,24 +879,24 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
- ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
- ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
- ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048575
@@ -921,24 +921,24 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048576
@@ -963,25 +963,25 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
- ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
- ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
- ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
- ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1073741823
@@ -1013,7 +1013,7 @@ body: |
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX7: liveins: $sgpr0_sgpr1
@@ -1026,7 +1026,7 @@ body: |
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX8: liveins: $sgpr0_sgpr1
@@ -1039,12 +1039,12 @@ body: |
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1
@@ -1078,7 +1078,7 @@ body: |
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX7: liveins: $sgpr0_sgpr1
@@ -1093,7 +1093,7 @@ body: |
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX8: liveins: $sgpr0_sgpr1
@@ -1108,12 +1108,12 @@ body: |
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
- ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0, 0 :: (load 4, addrspace 4)
+ ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -524288
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index 0d23dfeda9ab..df6207e0a58b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -19,22 +19,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_s32_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_s32_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_s32_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@@ -56,22 +56,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_2
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+ ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX8-LABEL: name: load_flat_s32_from_2
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+ ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX9-LABEL: name: load_flat_s32_from_2
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+ ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX10-LABEL: name: load_flat_s32_from_2
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+ ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0)
@@ -93,22 +93,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_1
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0)
@@ -129,19 +129,19 @@ body: |
; GFX7-LABEL: name: load_flat_v2s32
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_v2s32
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_v2s32
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_v2s32
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@@ -163,22 +163,22 @@ body: |
; GFX7-LABEL: name: load_flat_v3s32
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+ ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX8-LABEL: name: load_flat_v3s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+ ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX9-LABEL: name: load_flat_v3s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+ ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX10-LABEL: name: load_flat_v3s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+ ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
@@ -200,22 +200,22 @@ body: |
; GFX7-LABEL: name: load_flat_v4s32
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_flat_v4s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_flat_v4s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_flat_v4s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
@@ -237,22 +237,22 @@ body: |
; GFX7-LABEL: name: load_flat_s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@@ -274,22 +274,22 @@ body: |
; GFX7-LABEL: name: load_flat_v2s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_flat_v2s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_flat_v2s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_flat_v2s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+ ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
@@ -422,22 +422,22 @@ body: |
; GFX7-LABEL: name: load_flat_p3_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_p3_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_p3_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_p3_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@@ -459,22 +459,22 @@ body: |
; GFX7-LABEL: name: load_flat_p1_from_8
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_p1_from_8
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_p1_from_8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_p1_from_8
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@@ -566,22 +566,22 @@ body: |
; GFX7-LABEL: name: load_flat_v2s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_v2s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_v2s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_v2s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@@ -603,22 +603,22 @@ body: |
; GFX7-LABEL: name: load_flat_v4s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_v4s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_v4s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+ ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@@ -728,7 +728,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX8: liveins: $vgpr0_vgpr1
@@ -743,12 +743,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX10: liveins: $vgpr0_vgpr1
@@ -763,7 +763,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2047
@@ -797,7 +797,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX8: liveins: $vgpr0_vgpr1
@@ -812,12 +812,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX10: liveins: $vgpr0_vgpr1
@@ -832,7 +832,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2048
@@ -866,7 +866,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX8: liveins: $vgpr0_vgpr1
@@ -881,7 +881,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0_vgpr1
@@ -896,7 +896,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX10: liveins: $vgpr0_vgpr1
@@ -911,7 +911,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2047
@@ -945,7 +945,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX8: liveins: $vgpr0_vgpr1
@@ -960,7 +960,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
@@ -975,7 +975,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX10: liveins: $vgpr0_vgpr1
@@ -990,7 +990,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@@ -1024,7 +1024,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX8: liveins: $vgpr0_vgpr1
@@ -1039,12 +1039,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX10: liveins: $vgpr0_vgpr1
@@ -1059,7 +1059,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@@ -1093,7 +1093,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX8: liveins: $vgpr0_vgpr1
@@ -1108,7 +1108,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0_vgpr1
@@ -1123,7 +1123,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX10: liveins: $vgpr0_vgpr1
@@ -1138,7 +1138,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4096
@@ -1172,7 +1172,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX8: liveins: $vgpr0_vgpr1
@@ -1187,7 +1187,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0_vgpr1
@@ -1202,7 +1202,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX10: liveins: $vgpr0_vgpr1
@@ -1217,7 +1217,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4095
@@ -1251,7 +1251,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX8: liveins: $vgpr0_vgpr1
@@ -1266,7 +1266,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0_vgpr1
@@ -1281,7 +1281,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX10: liveins: $vgpr0_vgpr1
@@ -1296,7 +1296,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4096
@@ -1330,7 +1330,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX8: liveins: $vgpr0_vgpr1
@@ -1345,7 +1345,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0_vgpr1
@@ -1360,7 +1360,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX10: liveins: $vgpr0_vgpr1
@@ -1375,7 +1375,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8191
@@ -1409,7 +1409,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX8: liveins: $vgpr0_vgpr1
@@ -1424,7 +1424,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0_vgpr1
@@ -1439,7 +1439,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX10: liveins: $vgpr0_vgpr1
@@ -1454,7 +1454,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8192
@@ -1488,7 +1488,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX8: liveins: $vgpr0_vgpr1
@@ -1503,7 +1503,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0_vgpr1
@@ -1518,7 +1518,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX10: liveins: $vgpr0_vgpr1
@@ -1533,7 +1533,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8191
@@ -1567,7 +1567,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX8: liveins: $vgpr0_vgpr1
@@ -1582,7 +1582,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0_vgpr1
@@ -1597,7 +1597,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX10: liveins: $vgpr0_vgpr1
@@ -1612,7 +1612,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+ ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8192
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
index 2c96d673d072..5854fdca686d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
@@ -16,13 +16,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(p1) = COPY %0
@@ -47,13 +47,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -81,13 +81,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -125,7 +125,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@@ -141,7 +141,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -169,7 +169,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@@ -195,7 +195,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -225,7 +225,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@@ -251,7 +251,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@@ -279,13 +279,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4096
@@ -310,13 +310,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4097
@@ -351,7 +351,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
; GFX10: liveins: $sgpr0_sgpr1
@@ -367,7 +367,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4097
@@ -392,13 +392,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 2049
@@ -423,7 +423,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX10: liveins: $sgpr0_sgpr1
@@ -439,7 +439,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -2049
@@ -463,13 +463,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
- ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
- ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294967295
@@ -503,7 +503,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296
; GFX10: liveins: $sgpr0_sgpr1
@@ -519,7 +519,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294967296
@@ -554,7 +554,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
; GFX10: liveins: $sgpr0_sgpr1
@@ -570,7 +570,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294971390
@@ -605,7 +605,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295
; GFX10: liveins: $sgpr0_sgpr1
@@ -621,7 +621,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4294967295
@@ -655,7 +655,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296
; GFX10: liveins: $sgpr0_sgpr1
@@ -671,7 +671,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4294967296
@@ -693,12 +693,12 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr
; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr
; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(p1) = COPY %0
@@ -717,11 +717,11 @@ body: |
bb.0:
; GFX9-LABEL: name: load_global_s32_from_undef_vgpr
; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_undef_vgpr
; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
index ec7076cec4db..fc992c3284fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -27,7 +27,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_4
; GFX7: liveins: $vgpr0_vgpr1
@@ -37,27 +37,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_s32_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_s32_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@@ -84,7 +84,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_2
; GFX7: liveins: $vgpr0_vgpr1
@@ -94,27 +94,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_2
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX8-LABEL: name: load_global_s32_from_2
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX9-LABEL: name: load_global_s32_from_2
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
; GFX10-LABEL: name: load_global_s32_from_2
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1)
@@ -141,7 +141,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1
; GFX7: liveins: $vgpr0_vgpr1
@@ -151,27 +151,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1)
@@ -198,7 +198,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1
@@ -208,27 +208,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v2s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_v2s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_v2s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@@ -255,7 +255,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-LABEL: name: load_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1
@@ -265,27 +265,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v4s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_global_v4s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_global_v4s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_global_v4s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
@@ -312,27 +312,27 @@ body: |
; GFX7-LABEL: name: load_global_s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@@ -359,27 +359,27 @@ body: |
; GFX7-LABEL: name: load_global_v2s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX7-FLAT-LABEL: name: load_global_v2s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_global_v2s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_global_v2s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_global_v2s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
@@ -500,27 +500,27 @@ body: |
; GFX7-LABEL: name: load_global_p3_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX7-FLAT-LABEL: name: load_global_p3_from_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_p3_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_p3_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_p3_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@@ -547,27 +547,27 @@ body: |
; GFX7-LABEL: name: load_global_p1_from_8
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_p1_from_8
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_p1_from_8
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_p1_from_8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_p1_from_8
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@@ -688,27 +688,27 @@ body: |
; GFX7-LABEL: name: load_global_v2s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX7-FLAT-LABEL: name: load_global_v2s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_v2s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_v2s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_v2s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@@ -735,27 +735,27 @@ body: |
; GFX7-LABEL: name: load_global_v4s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_v4s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_v4s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_v4s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@@ -833,7 +833,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7: liveins: $vgpr0_vgpr1
@@ -843,7 +843,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -858,7 +858,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
; GFX8: liveins: $vgpr0_vgpr1
@@ -873,17 +873,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2047
@@ -912,7 +912,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -922,7 +922,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -937,7 +937,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
; GFX8: liveins: $vgpr0_vgpr1
@@ -952,12 +952,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
; GFX10: liveins: $vgpr0_vgpr1
@@ -972,7 +972,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2048
@@ -1011,7 +1011,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7: liveins: $vgpr0_vgpr1
@@ -1031,7 +1031,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1046,7 +1046,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX8: liveins: $vgpr0_vgpr1
@@ -1061,17 +1061,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2047
@@ -1110,7 +1110,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -1130,7 +1130,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1145,7 +1145,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX8: liveins: $vgpr0_vgpr1
@@ -1160,17 +1160,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@@ -1199,7 +1199,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
@@ -1209,7 +1209,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1224,7 +1224,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
; GFX8: liveins: $vgpr0_vgpr1
@@ -1239,12 +1239,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
; GFX10: liveins: $vgpr0_vgpr1
@@ -1259,7 +1259,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@@ -1289,7 +1289,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7: liveins: $vgpr0_vgpr1
@@ -1300,7 +1300,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1315,7 +1315,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
; GFX8: liveins: $vgpr0_vgpr1
@@ -1330,7 +1330,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0_vgpr1
@@ -1345,7 +1345,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
; GFX10: liveins: $vgpr0_vgpr1
@@ -1360,7 +1360,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4096
@@ -1399,7 +1399,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7: liveins: $vgpr0_vgpr1
@@ -1419,7 +1419,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1434,7 +1434,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX8: liveins: $vgpr0_vgpr1
@@ -1449,12 +1449,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX10: liveins: $vgpr0_vgpr1
@@ -1469,7 +1469,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4095
@@ -1508,7 +1508,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7: liveins: $vgpr0_vgpr1
@@ -1528,7 +1528,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1543,7 +1543,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX8: liveins: $vgpr0_vgpr1
@@ -1558,12 +1558,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX10: liveins: $vgpr0_vgpr1
@@ -1578,7 +1578,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4096
@@ -1608,7 +1608,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7: liveins: $vgpr0_vgpr1
@@ -1619,7 +1619,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1634,7 +1634,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
; GFX8: liveins: $vgpr0_vgpr1
@@ -1649,7 +1649,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0_vgpr1
@@ -1664,7 +1664,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
; GFX10: liveins: $vgpr0_vgpr1
@@ -1679,7 +1679,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8191
@@ -1709,7 +1709,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7: liveins: $vgpr0_vgpr1
@@ -1720,7 +1720,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1735,7 +1735,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
; GFX8: liveins: $vgpr0_vgpr1
@@ -1750,7 +1750,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0_vgpr1
@@ -1765,7 +1765,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
; GFX10: liveins: $vgpr0_vgpr1
@@ -1780,7 +1780,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8192
@@ -1819,7 +1819,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7: liveins: $vgpr0_vgpr1
@@ -1839,7 +1839,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1854,7 +1854,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX8: liveins: $vgpr0_vgpr1
@@ -1869,7 +1869,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0_vgpr1
@@ -1884,7 +1884,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX10: liveins: $vgpr0_vgpr1
@@ -1899,7 +1899,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8191
@@ -1938,7 +1938,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7: liveins: $vgpr0_vgpr1
@@ -1958,7 +1958,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1973,7 +1973,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX8: liveins: $vgpr0_vgpr1
@@ -1988,7 +1988,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0_vgpr1
@@ -2003,7 +2003,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX10: liveins: $vgpr0_vgpr1
@@ -2018,7 +2018,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8192
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir
index ba20a503abef..880961bd006b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir
@@ -24,27 +24,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
+ ; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v3s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
+ ; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX8-LABEL: name: load_global_v3s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
+ ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX9-LABEL: name: load_global_v3s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
+ ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
; GFX10-LABEL: name: load_global_v3s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
+ ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
index f0411d4233f3..2d23a64dff00 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
@@ -19,12 +19,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -49,12 +49,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_2
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5)
@@ -79,12 +79,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_1
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@@ -109,12 +109,12 @@ body: |
; GFX6-LABEL: name: load_private_p3_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -139,12 +139,12 @@ body: |
; GFX6-LABEL: name: load_private_p5_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -170,12 +170,12 @@ body: |
; GFX6-LABEL: name: load_private_v2s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_v2s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -206,12 +206,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2047
@@ -240,14 +240,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2147483647
@@ -279,12 +279,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2048
@@ -313,14 +313,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -2047
@@ -349,14 +349,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -2048
@@ -385,12 +385,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 4095
@@ -419,14 +419,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 4096
@@ -455,14 +455,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -4095
@@ -491,14 +491,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -4096
@@ -527,14 +527,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 8191
@@ -563,14 +563,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 8192
@@ -599,14 +599,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -8191
@@ -635,14 +635,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -8192
@@ -666,10 +666,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_4_constant_0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_4_constant_0
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
%0:vgpr(p5) = G_CONSTANT i32 0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -691,10 +691,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
%0:sgpr(p5) = G_CONSTANT i32 16
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -716,10 +716,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_constant_4095
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@@ -742,11 +742,11 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_1_constant_4096
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_constant_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@@ -770,10 +770,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_fi
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_fi
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@@ -796,10 +796,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
@@ -825,10 +825,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:sgpr(s32) = G_CONSTANT i32 4095
@@ -858,13 +858,13 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
- ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4096
@@ -890,11 +890,11 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_neg1
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_neg1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
- ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = G_CONSTANT i32 -1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index 925339ad7dbb..2c41c3a858ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -17,12 +17,12 @@ regBankSelected: true
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
# Immediate offset:
-# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
-# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0
+# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
+# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
# Max immediate offset for SI
-# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0
-# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0
+# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
+# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
# Immediate overflow for SI
# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
@@ -52,8 +52,8 @@ regBankSelected: true
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
-# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
-# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
+# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
+# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
# Immediate overflow for CI
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
@@ -66,7 +66,7 @@ regBankSelected: true
# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
-# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
+# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
# Max 32-bit byte offset
# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
@@ -84,8 +84,8 @@ regBankSelected: true
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
-# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
-# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0
+# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
+# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
# Pointer loads
# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
@@ -192,8 +192,8 @@ body: |
# GCN-LABEL: name: constant_address_positive{{$}}
# GCN: %0:sreg_64 = S_MOV_B64 44
-# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
-# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
+# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load 4, addrspace 4)
+# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load 4, addrspace 4)
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 0bb46b364eaf..b31579d729a9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -18,16 +18,16 @@ body: |
; WAVE64: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
- ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: sitofp
; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
- ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
index 83e455b9af5b..b7a43dfb4b34 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
@@ -17,12 +17,12 @@ body: |
; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
+ ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p0) = COPY $vgpr1_vgpr2
G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
@@ -152,12 +152,12 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(p0) = COPY $vgpr2_vgpr3
G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index 5b53dbea6b23..b4e7203e8ec3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -19,22 +19,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_s32_to_4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_s32_to_4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_s32_to_4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@@ -55,22 +55,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+ ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX8-LABEL: name: store_flat_s32_to_2
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+ ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX9-LABEL: name: store_flat_s32_to_2
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+ ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX10-LABEL: name: store_flat_s32_to_2
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+ ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 2, align 2, addrspace 0)
@@ -91,22 +91,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+ ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX8-LABEL: name: store_flat_s32_to_1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+ ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX9-LABEL: name: store_flat_s32_to_1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+ ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX10-LABEL: name: store_flat_s32_to_1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+ ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 1, align 1, addrspace 0)
@@ -128,22 +128,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@@ -237,22 +237,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_v2s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_v2s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@@ -273,22 +273,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+ ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX8-LABEL: name: store_flat_v3s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+ ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX9-LABEL: name: store_flat_v3s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+ ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX10-LABEL: name: store_flat_v3s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+ ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
@@ -309,22 +309,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8-LABEL: name: store_flat_v4s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9-LABEL: name: store_flat_v4s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10-LABEL: name: store_flat_v4s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
@@ -346,22 +346,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_v2s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_v2s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_v2s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@@ -383,22 +383,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_v4s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_v4s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_v4s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@@ -493,22 +493,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8-LABEL: name: store_flat_v2s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9-LABEL: name: store_flat_v2s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10-LABEL: name: store_flat_v2s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+ ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
@@ -530,22 +530,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_p1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_p1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_p1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@@ -604,22 +604,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_p3
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_p3
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_p3
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@@ -677,22 +677,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX8-LABEL: name: store_atomic_flat_s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX9-LABEL: name: store_atomic_flat_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX10-LABEL: name: store_atomic_flat_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
+ ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0)
@@ -714,22 +714,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX8-LABEL: name: store_atomic_flat_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX9-LABEL: name: store_atomic_flat_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
+ ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX10-LABEL: name: store_atomic_flat_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
+ ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0)
@@ -761,7 +761,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_s32_gep_2047
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -776,12 +776,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_s32_gep_2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_s32_gep_2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -796,7 +796,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 009353d52bd6..55a73a957aab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -26,7 +26,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -36,27 +36,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@@ -82,7 +82,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+ ; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_2
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -92,27 +92,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+ ; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_2
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_2
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
+ ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_2
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+ ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_2
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+ ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 2, align 2, addrspace 1)
@@ -138,7 +138,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+ ; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_1
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -148,27 +148,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+ ; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_1
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
+ ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+ ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+ ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 1, align 1, addrspace 1)
@@ -195,27 +195,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@@ -288,7 +288,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-LABEL: name: store_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -298,27 +298,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_v2s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_v2s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@@ -344,7 +344,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-LABEL: name: store_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -354,27 +354,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v4s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8-LABEL: name: store_global_v4s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX9-LABEL: name: store_global_v4s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10-LABEL: name: store_global_v4s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
@@ -401,27 +401,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_v2s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_v2s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_v2s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@@ -448,27 +448,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v4s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_v4s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_v4s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_v4s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@@ -542,27 +542,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8-LABEL: name: store_global_v2s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX9-LABEL: name: store_global_v2s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10-LABEL: name: store_global_v2s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
- ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
@@ -589,27 +589,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_p1
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_p1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_p1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_p1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@@ -683,27 +683,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_p3
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_p3
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_p3
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_p3
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@@ -776,27 +776,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX8-LABEL: name: store_atomic_global_s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX9-LABEL: name: store_atomic_global_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
; GFX10-LABEL: name: store_atomic_global_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1)
@@ -823,27 +823,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
+ ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX8-LABEL: name: store_atomic_global_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX9-LABEL: name: store_atomic_global_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
; GFX10-LABEL: name: store_atomic_global_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1)
@@ -870,7 +870,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_gep_2047
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -880,7 +880,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -895,7 +895,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_s32_gep_2047
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -910,17 +910,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
- ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_s32_gep_2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_s32_gep_2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
index 34038925fe4f..752c84dc0f4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
@@ -26,27 +27,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
+ ; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v3s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
+ ; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX8-LABEL: name: store_global_v3s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
+ ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX9-LABEL: name: store_global_v3s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
+ ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX10-LABEL: name: store_global_v3s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
- ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
+ ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
G_STORE %1, %0 :: (store 12, align 16, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
index 992a76795bf9..f96c150a1b11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
@@ -21,12 +21,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_4
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -52,12 +52,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_2
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
@@ -83,12 +83,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
@@ -114,12 +114,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_v2s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -145,12 +145,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_p3
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -176,12 +176,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_p5
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -206,10 +206,10 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
%2:vgpr(p5) = G_PTR_ADD %0, %1
@@ -236,10 +236,10 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4095
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@@ -265,11 +265,11 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4096
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@@ -294,12 +294,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_4
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -324,12 +324,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_2
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
@@ -354,12 +354,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
@@ -384,12 +384,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_v2s16
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -414,12 +414,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_p3
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -444,12 +444,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_p5
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@@ -475,11 +475,11 @@ body: |
; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
%2:vgpr(p5) = G_PTR_ADD %0, %1
@@ -507,11 +507,11 @@ body: |
; GFX6-LABEL: name: kernel_store_private_s32_to_1_constant_4095
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4095
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@@ -538,12 +538,12 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4096
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
index f2bd8745d1dc..21cb91c14723 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
@@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1
; CHECK: $vgpr0 = COPY [[COPY8]]
@@ -81,7 +81,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -170,7 +170,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -195,7 +195,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@@ -217,7 +217,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
index a25855ed8c22..39c2a0ce688d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
@@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY8]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -40,7 +40,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: S_ENDPGM 0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -84,7 +84,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@@ -137,7 +137,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@@ -165,7 +165,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY8]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index 7d29afa40faf..c29f5cf8f977 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -28,7 +28,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -59,7 +59,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -89,7 +89,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -119,7 +119,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
ret void
@@ -200,7 +200,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -284,7 +284,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4095
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -342,7 +342,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@@ -355,7 +355,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
@@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
index ad616e43e400..ecd6d2085982 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@@ -27,7 +27,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -45,7 +45,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@@ -58,7 +58,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@@ -93,7 +93,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@@ -109,7 +109,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
@@ -169,7 +169,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -209,7 +209,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -234,7 +234,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@@ -250,7 +250,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
index 2a91b3689d9c..b8eb3ac678a3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
@@ -13,7 +13,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -31,7 +31,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@@ -52,7 +52,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
@@ -75,7 +75,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
@@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -146,7 +146,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index a9e2e445d35f..ce835676f04d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -69,7 +69,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -114,7 +114,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -140,7 +140,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
@@ -159,7 +159,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
@@ -178,7 +178,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
@@ -197,7 +197,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
@@ -216,7 +216,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
@@ -235,7 +235,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
@@ -254,7 +254,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@@ -275,7 +275,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
@@ -298,7 +298,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
@@ -323,7 +323,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -341,7 +341,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -365,7 +365,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@@ -386,7 +386,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -406,7 +406,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -444,7 +444,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -486,7 +486,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -512,7 +512,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
@@ -529,7 +529,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
@@ -548,7 +548,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
@@ -566,7 +566,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 16
@@ -585,7 +585,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@@ -607,7 +607,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4096
@@ -626,7 +626,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
@@ -644,7 +644,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
@@ -664,7 +664,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 16
@@ -685,7 +685,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4095
@@ -706,7 +706,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4096
@@ -744,7 +744,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -790,7 +790,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
index 31724a1b5cd6..f40e52443ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
@@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
@@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; PACKED: bb.1 (%ir-block.0):
@@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
@@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; PACKED: bb.1 (%ir-block.0):
@@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; PACKED: bb.1 (%ir-block.0):
@@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; PACKED: bb.1 (%ir-block.0):
@@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; PACKED: bb.1 (%ir-block.0):
@@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -385,7 +385,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; PACKED: bb.1 (%ir-block.0):
@@ -401,7 +401,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -448,7 +448,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -489,7 +489,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
index 574c4efc5bca..37c8c8055b89 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -244,7 +244,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
- ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
index 43a27d0c33b0..3e0ee00bfe32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
@@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
ret void
@@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
@@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
ret void
@@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
ret void
@@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
ret void
@@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
ret void
@@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
ret void
@@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -517,7 +517,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
ret void
@@ -535,7 +535,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -554,7 +554,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -576,7 +576,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -595,7 +595,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@@ -613,7 +613,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@@ -631,7 +631,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -650,7 +650,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -672,7 +672,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@@ -711,7 +711,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -755,7 +755,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
index 1c4775109ee0..056e373f9a99 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
@@ -13,7 +13,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@@ -26,7 +26,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -44,7 +44,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@@ -68,7 +68,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -92,7 +92,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
@@ -126,7 +126,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@@ -167,7 +167,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -207,7 +207,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -232,7 +232,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
@@ -245,7 +245,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
@@ -263,7 +263,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
@@ -276,7 +276,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
@@ -294,7 +294,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
@@ -307,7 +307,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
@@ -325,7 +325,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
@@ -338,7 +338,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
index 7472e3abca87..d09baeb278ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
@@ -12,7 +12,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -30,7 +30,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@@ -51,7 +51,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
@@ -74,7 +74,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
@@ -119,7 +119,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -144,7 +144,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
@@ -162,7 +162,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
@@ -180,7 +180,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
@@ -198,7 +198,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
index 7a181e849a60..206968e017e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -49,7 +49,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -93,7 +93,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -108,7 +108,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -179,7 +179,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -224,7 +224,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -263,7 +263,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -349,7 +349,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -374,7 +374,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
; PACKED: bb.1 (%ir-block.0):
@@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
ret void
@@ -405,7 +405,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; PACKED: bb.1 (%ir-block.0):
@@ -418,7 +418,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
ret void
@@ -436,7 +436,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
; PACKED: bb.1 (%ir-block.0):
@@ -449,7 +449,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
ret void
@@ -467,7 +467,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
; PACKED: bb.1 (%ir-block.0):
@@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
index 589351e55ed5..0f8f768cab44 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
ret void
@@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -98,7 +98,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -182,7 +182,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -228,7 +228,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -268,7 +268,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
index 2228c2b714f2..9ead4cf92e3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -35,7 +35,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -57,7 +57,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -80,7 +80,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@@ -100,7 +100,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
ret void
@@ -135,7 +135,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -180,7 +180,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -226,7 +226,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -252,7 +252,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
ret void
@@ -271,7 +271,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
ret void
@@ -290,7 +290,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
ret void
@@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
ret void
@@ -328,7 +328,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0)
ret void
@@ -345,7 +345,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0)
ret void
@@ -364,7 +364,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0)
ret void
@@ -382,7 +382,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 16
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -401,7 +401,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4095
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -423,7 +423,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4096
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -442,7 +442,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0)
ret void
@@ -460,7 +460,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0)
ret void
@@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 16
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -501,7 +501,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 4095
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -522,7 +522,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 4096
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@@ -560,7 +560,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -606,7 +606,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index b642ba94d19a..8b2e2c043504 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -16,7 +16,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -30,7 +30,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -44,7 +44,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -63,7 +63,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4)
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -77,7 +77,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4)
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -91,7 +91,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4)
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -110,7 +110,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4)
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
@@ -129,7 +129,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4)
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
@@ -148,7 +148,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4)
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
@@ -172,7 +172,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4)
; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
@@ -199,7 +199,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4)
; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
@@ -226,7 +226,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4)
; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
@@ -258,7 +258,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4)
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
@@ -301,7 +301,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4)
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
@@ -344,7 +344,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4)
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
@@ -392,7 +392,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4)
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
@@ -467,7 +467,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4)
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
@@ -542,7 +542,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4)
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
@@ -622,7 +622,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -636,7 +636,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -649,7 +649,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -667,7 +667,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -680,7 +680,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -693,7 +693,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -712,7 +712,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -726,7 +726,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -739,7 +739,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -757,7 +757,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -770,7 +770,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -783,7 +783,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -801,7 +801,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -814,7 +814,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -827,7 +827,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -846,7 +846,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -860,7 +860,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -873,7 +873,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -892,7 +892,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -905,7 +905,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -918,7 +918,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -937,7 +937,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -951,7 +951,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -964,7 +964,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -983,7 +983,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -997,7 +997,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1011,7 +1011,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1030,7 +1030,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1043,7 +1043,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1057,7 +1057,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1076,7 +1076,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1089,7 +1089,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1103,7 +1103,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1122,7 +1122,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1135,7 +1135,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1149,7 +1149,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1168,7 +1168,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc)
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1181,7 +1181,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc)
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1195,7 +1195,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc)
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1214,7 +1214,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1227,7 +1227,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1241,7 +1241,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1260,7 +1260,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1273,7 +1273,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1287,7 +1287,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1306,7 +1306,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1319,7 +1319,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1333,7 +1333,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1352,7 +1352,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1365,7 +1365,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1379,7 +1379,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1398,7 +1398,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1411,7 +1411,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1424,7 +1424,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1443,7 +1443,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1456,7 +1456,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load 4)
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1470,7 +1470,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4)
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
@@ -1491,7 +1491,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
@@ -1504,7 +1504,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
@@ -1517,7 +1517,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
@@ -1535,7 +1535,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; GFX6: $vgpr0 = COPY [[COPY5]]
@@ -1551,7 +1551,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; GFX7: $vgpr0 = COPY [[COPY5]]
@@ -1567,7 +1567,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; GFX8: $vgpr0 = COPY [[COPY5]]
@@ -1588,7 +1588,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
@@ -1611,7 +1611,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
@@ -1634,7 +1634,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
@@ -1662,7 +1662,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
@@ -1682,7 +1682,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
@@ -1702,7 +1702,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
@@ -1727,8 +1727,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1757,8 +1757,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1787,8 +1787,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1822,10 +1822,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1870,10 +1870,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1918,10 +1918,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -1971,7 +1971,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
@@ -1984,7 +1984,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
@@ -1997,7 +1997,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4092
@@ -2016,7 +2016,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
@@ -2029,7 +2029,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
@@ -2042,7 +2042,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4095
@@ -2061,7 +2061,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
@@ -2074,7 +2074,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
@@ -2087,7 +2087,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4096
@@ -2107,8 +2107,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32>
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2137,8 +2137,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32>
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2167,8 +2167,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32>
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2204,8 +2204,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32>
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2234,8 +2234,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32>
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2264,8 +2264,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32>
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2300,10 +2300,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2348,10 +2348,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2396,10 +2396,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2450,10 +2450,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2498,10 +2498,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2546,10 +2546,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4)
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
@@ -2617,7 +2617,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2652,7 +2652,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2687,7 +2687,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2726,7 +2726,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2759,7 +2759,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2792,7 +2792,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2836,7 +2836,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2873,7 +2873,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2910,7 +2910,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2950,7 +2950,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -2983,7 +2983,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3016,7 +3016,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3057,7 +3057,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3092,7 +3092,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3125,7 +3125,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4096)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4096)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3165,8 +3165,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3215,8 +3215,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3265,8 +3265,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3327,8 +3327,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3381,8 +3381,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3435,8 +3435,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3495,8 +3495,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3549,8 +3549,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3603,8 +3603,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3660,8 +3660,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3711,8 +3711,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3762,8 +3762,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3819,8 +3819,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3870,8 +3870,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3921,8 +3921,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -3978,8 +3978,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4029,8 +4029,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4080,8 +4080,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4136,8 +4136,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4186,8 +4186,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4236,8 +4236,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -4277,7 +4277,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
@@ -4290,7 +4290,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
@@ -4303,7 +4303,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset = add i32 %offset.v, %offset.s
@@ -4322,7 +4322,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
@@ -4335,7 +4335,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
@@ -4348,7 +4348,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset = add i32 %offset.s, %offset.v
@@ -4370,7 +4370,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
@@ -4386,7 +4386,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
@@ -4402,7 +4402,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.v, %offset.s
@@ -4425,7 +4425,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
@@ -4441,7 +4441,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
@@ -4457,7 +4457,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.s, %offset.v
@@ -4480,7 +4480,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
@@ -4495,7 +4495,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
@@ -4510,7 +4510,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.s, 1024
@@ -4533,7 +4533,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
@@ -4549,7 +4549,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
@@ -4565,7 +4565,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.v, 1024
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
index bfffdea59c06..3b2095fba134 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
@@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -38,7 +38,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -62,7 +62,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1
; CHECK: $vgpr0 = COPY [[COPY9]]
@@ -89,7 +89,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -132,7 +132,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -184,7 +184,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -212,7 +212,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
index 0da9728ae476..f03efd59a324 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
@@ -19,7 +19,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY9]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -45,7 +45,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: S_ENDPGM 0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -92,7 +92,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@@ -148,7 +148,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@@ -178,7 +178,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY9]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 1f9f9af6c21b..a53e42bd3321 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -32,7 +32,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -67,7 +67,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
@@ -99,7 +99,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0)
ret void
@@ -131,7 +131,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
@@ -218,7 +218,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -306,7 +306,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -334,7 +334,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@@ -349,7 +349,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
@@ -367,7 +367,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@@ -380,7 +380,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2)
ret void
@@ -415,7 +415,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -446,7 +446,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
index 0039097ca142..d36e9951af82 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
@@ -15,7 +15,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@@ -30,7 +30,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -50,7 +50,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@@ -76,7 +76,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -102,7 +102,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
@@ -138,7 +138,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY7]]
@@ -183,7 +183,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -247,7 +247,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -277,7 +277,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095
@@ -292,7 +292,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@@ -313,7 +313,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@@ -328,7 +328,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
index 27fe9ba96de9..f8f4745037e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
@@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
@@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
@@ -132,7 +132,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -166,7 +166,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@@ -187,7 +187,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index 6fcb30ac51e9..60a6f8595cbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -37,7 +37,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@@ -61,7 +61,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2
@@ -87,7 +87,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2
@@ -116,7 +116,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0)
@@ -137,7 +137,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@@ -158,7 +158,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0)
@@ -200,7 +200,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -227,7 +227,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -249,7 +249,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -272,7 +272,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -294,7 +294,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -318,7 +318,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -339,7 +339,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -366,7 +366,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@@ -390,7 +390,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
index 3712975a4995..701531d328ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
@@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -31,7 +31,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -55,7 +55,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -70,7 +70,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -103,7 +103,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -120,7 +120,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -162,7 +162,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -206,7 +206,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -233,7 +233,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@@ -248,7 +248,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
index 7a540d859a9d..892cc82237c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
@@ -15,7 +15,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -37,7 +37,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -60,7 +60,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -84,7 +84,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -126,7 +126,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -153,7 +153,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
index 9dede257a02c..b1de301e9abb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
@@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -62,7 +62,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -86,7 +86,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -132,7 +132,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -159,7 +159,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
+ ; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -180,7 +180,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@@ -201,7 +201,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)
ret void
@@ -221,7 +221,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -249,7 +249,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
index c0049efda5ac..a9296f006167 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
@@ -16,7 +16,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@@ -31,7 +31,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -51,7 +51,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@@ -66,7 +66,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@@ -103,7 +103,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY7]]
@@ -121,7 +121,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
@@ -163,7 +163,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0
@@ -179,7 +179,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -220,7 +220,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -266,7 +266,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -314,7 +314,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095
@@ -329,7 +329,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
index fd7bac4a6721..ca7eb249124d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
@@ -15,7 +15,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
@@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
@@ -111,7 +111,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
@@ -152,7 +152,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -186,7 +186,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
diff --git a/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir b/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir
index a897f991a005..4a68ca617bb1 100644
--- a/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir
+++ b/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir
@@ -24,7 +24,7 @@ body: |
; CHECK: $sgpr1 = COPY killed $sgpr5
; CHECK: $sgpr4_sgpr5 = S_GETPC_B64
; CHECK: $sgpr4 = S_MOV_B32 $sgpr8
- ; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+ ; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0
@@ -39,7 +39,7 @@ body: |
bb.1:
renamable $vgpr0 = V_MOV_B32_e32 1065353216, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
bb.2:
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
index 5dea09365ca6..fe9e582b073a 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
@@ -14,9 +14,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -33,9 +33,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -52,9 +52,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -71,9 +71,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -92,9 +92,9 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256 = COPY %3:sgpr_256
- %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -112,11 +112,11 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_512 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -134,15 +134,15 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_1024 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -160,9 +160,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_64 = IMPLICIT_DEF
%2:vreg_64 = COPY %3:areg_64
- GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -180,9 +180,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_128 = IMPLICIT_DEF
%2:vreg_128 = COPY %3:areg_128
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -200,12 +200,12 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_512 = IMPLICIT_DEF
%2:vreg_512 = COPY %3:areg_512
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -223,14 +223,14 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_1024 = IMPLICIT_DEF
%2:vreg_1024 = COPY %3:areg_1024
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
index fc660e22c044..afa1d452bc50 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
@@ -16,9 +16,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -35,9 +35,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -54,9 +54,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -73,9 +73,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
- %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -94,9 +94,9 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256_align2 = COPY %3:sgpr_256
- %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -114,11 +114,11 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_512_align2 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -136,15 +136,15 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_1024_align2 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -162,9 +162,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_64 = IMPLICIT_DEF
%2:vreg_64_align2 = COPY %3:areg_64
- GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -182,9 +182,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_128 = IMPLICIT_DEF
%2:vreg_128_align2 = COPY %3:areg_128
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -202,12 +202,12 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_512 = IMPLICIT_DEF
%2:vreg_512_align2 = COPY %3:areg_512
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@@ -225,14 +225,14 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_1024 = IMPLICIT_DEF
%2:vreg_1024_align2 = COPY %3:areg_1024
- GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index be819ceb88cc..22476fec3e86 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -4,7 +4,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
-; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)
+; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load 16 from %ir.13, addrspace 4)
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
index 7a286bf3aef9..1a42f801b377 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
@@ -83,14 +83,14 @@ body: |
successors: %bb.1(0x40000000), %bb.4(0x40000000)
liveins: $sgpr4_sgpr5
- renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
- renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
+ renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
+ renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
S_WAITCNT 49279
renamable $vgpr0 = nofpexcept V_MUL_F32_e64 0, killed $sgpr4, 0, $sgpr4, 0, 0, implicit $mode, implicit $exec
DBG_VALUE renamable $sgpr6_sgpr7, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 12, DW_OP_stack_value), debug-location !12
$vgpr1 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr6_sgpr7
$vgpr2 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr6_sgpr7, implicit $exec
- GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, 0, 0, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
+ GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
renamable $sgpr4 = S_MOV_B32 8388608
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_GT_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
@@ -102,7 +102,7 @@ body: |
renamable $sgpr4_sgpr5 = IMPLICIT_DEF
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
$vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $exec
- renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
+ renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
renamable $sgpr4 = S_MOV_B32 2139095040
S_WAITCNT 3952
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_NEQ_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir b/llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
index acc47a065cc0..62a1d08e1736 100644
--- a/llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir
@@ -8,9 +8,9 @@ name: trivial_smem_clause_load_smrd4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@@ -20,11 +20,11 @@ name: trivial_smem_clause_load_smrd4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -34,13 +34,13 @@ name: trivial_smem_clause_load_smrd4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
S_ENDPGM 0
...
---
@@ -50,15 +50,15 @@ name: trivial_smem_clause_load_smrd4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
- ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
- ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+ ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
- $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
- $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
S_ENDPGM 0
...
---
@@ -67,11 +67,11 @@ name: trivial_smem_clause_load_smrd4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
- ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@@ -81,9 +81,9 @@ name: smrd_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
- ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@@ -93,9 +93,9 @@ name: smrd_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
- ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@@ -105,9 +105,9 @@ name: smrd_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: smrd_load8_overwrite_ptr
- ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@@ -119,46 +119,46 @@ name: break_smem_clause_at_max_smem_clause_size_smrd_load4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
- ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
+ ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
; GCN-NEXT: S_ENDPGM 0
- $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
-
- $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
-
- $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
-
- $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
-
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
+ $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+
+ $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+
+ $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+
+ $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
$sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
S_ENDPGM 0
...
@@ -169,12 +169,12 @@ name: break_smem_clause_simple_load_smrd4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
- ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -184,11 +184,11 @@ name: break_smem_clause_simple_load_smrd4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -198,12 +198,12 @@ name: break_smem_clause_simple_load_smrd8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -213,11 +213,11 @@ name: break_smem_clause_simple_load_smrd16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
- $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
S_ENDPGM 0
...
---
@@ -228,16 +228,16 @@ body: |
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
bb.0:
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
bb.1:
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -248,11 +248,11 @@ name: break_smem_clause_store_load_into_ptr_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
- ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM 0
- S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
- $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
+ S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
+ $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
S_ENDPGM 0
...
---
@@ -264,11 +264,11 @@ name: break_smem_clause_store_load_into_data_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
- ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -278,13 +278,13 @@ name: valu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -294,13 +294,13 @@ name: salu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr8 = S_MOV_B32 0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -309,13 +309,13 @@ name: ds_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -325,13 +325,13 @@ name: flat_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: flat_inst_breaks_smem_clause
- ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@@ -341,11 +341,11 @@ name: implicit_use_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_smem_clause
- ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
+ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
+ ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
- $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
+ $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
index 5def5f9382a2..2c6f8b88922c 100644
--- a/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir
@@ -9,10 +9,10 @@ name: trivial_clause_load_flat4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x1
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -22,12 +22,12 @@ name: trivial_clause_load_flat4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -37,14 +37,14 @@ name: trivial_clause_load_flat4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x3
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -54,16 +54,16 @@ name: trivial_clause_load_flat4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x4
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -73,12 +73,12 @@ name: trivial_clause_load_flat4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -88,10 +88,10 @@ name: flat_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -101,10 +101,10 @@ name: flat_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
- ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -114,10 +114,10 @@ name: flat_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: flat_load8_overwrite_ptr
- ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -130,48 +130,48 @@ name: break_clause_at_max_clause_size_flat_load4
body: |
bb.0:
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
- ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
; GCN-NEXT: S_ENDPGM 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
S_ENDPGM 0
...
@@ -182,13 +182,13 @@ name: break_clause_simple_load_flat4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -198,13 +198,13 @@ name: break_clause_simple_load_flat4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -214,13 +214,13 @@ name: break_clause_simple_load_flat8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -231,12 +231,12 @@ name: break_clause_simple_load_flat16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -251,17 +251,17 @@ body: |
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
bb.0:
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
- $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -272,12 +272,12 @@ name: break_clause_store_load_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -289,12 +289,12 @@ name: break_clause_store_load_into_data_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -305,14 +305,14 @@ name: valu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -323,14 +323,14 @@ name: salu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$sgpr8 = S_MOV_B32 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -340,14 +340,14 @@ name: ds_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -357,14 +357,14 @@ name: smrd_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: smrd_inst_breaks_clause
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -374,13 +374,13 @@ name: implicit_use_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_clause
- ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
- $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
+ $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -389,12 +389,12 @@ name: trivial_clause_load_mubuf4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -403,13 +403,13 @@ name: break_clause_simple_load_mubuf_offen_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -420,11 +420,11 @@ name: mubuf_load4_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
- ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
S_ENDPGM 0
@@ -437,13 +437,13 @@ name: break_clause_flat_load_mubuf_load
body: |
bb.0:
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
- ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# Break a clause from interference between mubuf and flat instructions
@@ -458,8 +458,8 @@ name: break_clause_mubuf_load_flat_load
body: |
bb.0:
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -470,13 +470,13 @@ name: break_clause_atomic_rtn_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
- ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -485,12 +485,12 @@ name: break_clause_atomic_nortn_ptr_load_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
- ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
- FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -500,13 +500,13 @@ name: break_clause_atomic_rtn_into_ptr_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
S_ENDPGM 0
...
---
@@ -517,11 +517,11 @@ body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -532,11 +532,11 @@ name: no_break_clause_mubuf_load_novaddr
body: |
bb.0:
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
- ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -546,16 +546,16 @@ name: mix_load_store_clause
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause
- ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -565,15 +565,15 @@ name: mix_load_store_clause_same_address
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause_same_address
- ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
- ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
index eea193145b62..2c715c6faa67 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
@@ -9,27 +9,27 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
- ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
+ ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 80, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
@@ -38,19 +38,19 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0
; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
@@ -60,23 +60,23 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4)
; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
@@ -88,77 +88,77 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0
; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88
; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
; GCN: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0
; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176
; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]]
; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0
; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]]
; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0
; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]]
; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
- ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192
- ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224
; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240
; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
- ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4)
@@ -176,68 +176,68 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]]
; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0
; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136
; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]]
; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0
; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272
; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]]
; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0
; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]]
; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0
; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]]
; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0
; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]]
; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0
; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]]
; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288
; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
; GCN: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304
; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]]
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
- ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: S_ENDPGM 0
bb.0:
%tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
index 7937b45fb639..adbe27735c83 100644
--- a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
+++ b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -10,14 +10,14 @@ body: |
bb.0:
; GCN-LABEL: name: src_bundle_latency
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
; GCN: }
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
- $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
}
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
@@ -32,13 +32,13 @@ body: |
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, implicit $exec
; GCN: }
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
}
...
diff --git a/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir b/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
index e3021cfe0965..2b62a18e6a88 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
@@ -25,7 +25,7 @@ body: |
; GCN-LABEL: name: call_waw_waitcnt
; GCN: liveins: $sgpr4_sgpr5, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: S_WAITCNT 0
- ; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
+ ; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
; GCN: $sgpr33 = S_MOV_B32 killed $sgpr7
; GCN: $flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
; GCN: $flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc
@@ -38,7 +38,7 @@ body: |
; GCN: S_WAITCNT 49279
; GCN: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def dead $vgpr0
; GCN: S_ENDPGM 0
- $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
+ $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
$sgpr33 = S_MOV_B32 killed $sgpr7
$flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
index 64327f7e6d0c..8876af8f0833 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
+++ b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
@@ -43,8 +43,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -55,10 +55,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -105,8 +105,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -117,10 +117,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -168,8 +168,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -180,10 +180,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -233,8 +233,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -245,10 +245,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -310,8 +310,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -322,10 +322,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -375,8 +375,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@@ -387,10 +387,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
- %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
index d9fc00c522ab..a52faa79d52c 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
@@ -17,15 +17,15 @@ body: |
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr4_vgpr5 = IMPLICIT_DEF
- $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr2 = IMPLICIT_DEF
$vgpr3 = IMPLICIT_DEF
$vgpr6 = IMPLICIT_DEF
$vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
- FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
- FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
index 1e307518c956..9693de8a1d55 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
@@ -14,7 +14,7 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
- %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4)
...
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
index 6741d4d46a8f..181cfba5b0aa 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
@@ -30,7 +30,7 @@ body: |
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
- BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
+ BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM 0
bb.2:
@@ -78,7 +78,7 @@ body: |
bb.8:
successors: %bb.10
- %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+ %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
%28:vgpr_32 = COPY %35
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
index c8685717038a..38714d205b8e 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
@@ -83,7 +83,7 @@ body: |
bb.9:
successors: %bb.10(0x80000000)
- %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
%22:sreg_64 = COPY $exec, implicit-def $exec
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
@@ -125,7 +125,7 @@ body: |
%27.sub5:sgpr_256 = COPY %26
%27.sub6:sgpr_256 = COPY %26
%27.sub7:sgpr_256 = COPY killed %26
- %28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
+ %28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%29:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %28, implicit $mode, implicit $exec
$m0 = S_MOV_B32 -1
DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3)
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
index de14c0304ba0..6145657a72ff 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
@@ -68,7 +68,7 @@ body: |
%23:vreg_128 = COPY killed %17
%24:sreg_64 = COPY killed %16
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
- %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
%29:vreg_128 = COPY killed %21
%29.sub0:vreg_128 = COPY %1
@@ -257,7 +257,7 @@ body: |
%109.sub5:sgpr_256 = COPY %108
%109.sub6:sgpr_256 = COPY %108
%109.sub7:sgpr_256 = COPY killed %108
- %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
+ %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec
%114:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
index 5d432ab157a2..d5644f39e2b5 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
@@ -46,10 +46,10 @@ body: |
%0 = COPY $sgpr2_sgpr3
%1 = COPY $vgpr2
%2 = COPY $vgpr3
- %3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
- %4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
- %5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
- %6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
+ %3 = S_LOAD_DWORDX8_IMM %0, 0, 0
+ %4 = S_LOAD_DWORDX4_IMM %0, 12, 0
+ %5 = S_LOAD_DWORDX8_IMM %0, 16, 0
+ %6 = S_LOAD_DWORDX4_IMM %0, 28, 0
undef %7.sub0 = S_MOV_B32 212739
%20 = COPY %7
%11 = COPY %20
@@ -61,7 +61,7 @@ body: |
%11.sub6 = COPY %1
%11.sub7 = COPY %1
%11.sub8 = COPY %1
- dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
+ dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
%20.sub1 = COPY %2
%20.sub2 = COPY %2
%20.sub3 = COPY %2
@@ -70,6 +70,6 @@ body: |
%20.sub6 = COPY %2
%20.sub7 = COPY %2
%20.sub8 = COPY %2
- dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
+ dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
...
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
index 4bd73ea682f9..7b8664fdc42f 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
@@ -11,7 +11,7 @@
#
# GCN-LABEL: bb.6:
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
-# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
#
--- |
@@ -69,7 +69,7 @@ body: |
%10:sreg_64 = COPY killed %5
undef %11.sub2:sgpr_128 = COPY %4
%11.sub3:sgpr_128 = COPY %3
- %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
undef %13.sub1:vreg_128 = COPY %9.sub1
%13.sub2:vreg_128 = COPY %9.sub2
%14:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $mode, implicit $exec
@@ -161,7 +161,7 @@ body: |
bb.18:
successors: %bb.7(0x80000000)
dead %59:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $mode, implicit $exec
- dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
undef %66.sub1:vreg_128 = COPY %13.sub1
%66.sub2:vreg_128 = COPY %13.sub2
%67:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
index 01a60108598c..e09816438f05 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
@@ -47,7 +47,7 @@ body: |
%4.sub5:sgpr_256 = COPY %1
%4.sub6:sgpr_256 = COPY %1
%4.sub7:sgpr_256 = COPY killed %1
- %5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
+ %5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%6:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%7:vgpr_32 = nofpexcept V_RCP_F32_e32 killed %6, implicit $mode, implicit $exec
%8:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %7, implicit $mode, implicit $exec
@@ -145,10 +145,10 @@ body: |
%40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
%41:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $mode, implicit $exec
%42:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 killed %41, implicit $mode, implicit $exec
- %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0, 0 :: (dereferenceable invariant load 4)
+ %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0 :: (dereferenceable invariant load 4)
%45:vgpr_32 = V_MUL_LO_I32_e64 killed %42, killed %43, implicit $exec
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
- %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
+ %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
index f522c3f30e4b..7f8ceafda2f9 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
@@ -34,7 +34,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@@ -42,7 +42,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
- ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@@ -54,7 +54,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@@ -83,7 +83,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
- undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
+ undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@@ -91,7 +91,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
- BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@@ -103,7 +103,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 154ea71b7113..3440ed923260 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -27,7 +27,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -37,7 +37,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -71,7 +71,7 @@ body: |
%3:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%14:vreg_64 = REG_SEQUENCE %3, %subreg.hi16, %13, %subreg.lo16
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
@@ -85,19 +85,19 @@ body: |
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
%9:vgpr_32 = V_AND_B32_e64 %8, %6, implicit $exec
- FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
%10:vgpr_32 = V_AND_B32_e64 %6, %8, implicit $exec
- FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
%11:vgpr_32 = V_AND_B32_e32 %8, %6, implicit $exec
- FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_AND_B32_e64 %8, %8, implicit $exec
- FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_AND_B32_e64 %16, %16, implicit $exec
- FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -117,7 +117,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%5:sreg_32_xm0_xexec = S_MOV_B32 1
%6:sreg_32_xm0 = COPY %4.sub1
%7:sreg_32_xm0 = COPY %4.sub0
@@ -126,7 +126,7 @@ body: |
%10:sgpr_128 = REG_SEQUENCE killed %7, %subreg.hi16, killed %6, %subreg.lo16, killed %9, %subreg.sub0, killed %8, %subreg.sub0_sub1
%12:sreg_32_xm0 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
%13:vgpr_32 = COPY %12
- BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -144,7 +144,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sreg_32_xm0_xexec = S_MOV_B32 999123
%4:sreg_32_xm0 = COPY %2.sub1
%5:sreg_32_xm0 = COPY %2.sub0
@@ -153,7 +153,7 @@ body: |
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
%10:sreg_32_xm0 = S_ASHR_I32 killed %3, 12, implicit-def dead $scc
%11:vgpr_32 = COPY %10
- BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -200,7 +200,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@@ -221,34 +221,34 @@ body: |
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%11:vgpr_32 = V_ASHRREV_I32_e64 8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_ASHR_I32_e64 %7, 3, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_ASHR_I32_e64 7, %29, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_ASHR_I32_e64 %27, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_ASHR_I32_e64 %6, 4, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_ASHR_I32_e64 %6, %30, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_ASHR_I32_e32 %31, %31, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_ASHRREV_I32_e32 11, %10, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_ASHR_I32_e32 %27, %32, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@@ -267,7 +267,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sreg_32_xm0_xexec = S_MOV_B32 -999123
%4:sreg_32_xm0 = COPY %2.sub1
%5:sreg_32_xm0 = COPY %2.sub0
@@ -276,7 +276,7 @@ body: |
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
%10:sreg_32_xm0 = S_LSHR_B32 killed %3, 12, implicit-def dead $scc
%11:vgpr_32 = COPY %10
- BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -324,7 +324,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@@ -345,34 +345,34 @@ body: |
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%11:vgpr_32 = V_LSHRREV_B32_e64 8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_LSHR_B32_e64 %7, 3, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_LSHR_B32_e64 7, %29, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_LSHR_B32_e64 %27, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_LSHR_B32_e64 %6, 4, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_LSHR_B32_e64 %6, %30, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_LSHR_B32_e32 %31, %31, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_LSHRREV_B32_e32 11, %10, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_LSHR_B32_e32 %27, %32, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -390,7 +390,7 @@ body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%2:vgpr_32 = V_XOR_B32_e64 killed %0, undef %1:vgpr_32, implicit $exec
- FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -434,7 +434,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -444,7 +444,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_ANDN2_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -467,7 +467,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -477,7 +477,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_OR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -516,7 +516,7 @@ body: |
%3:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%14:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %13, %subreg.sub1
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
@@ -529,15 +529,15 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 1234567
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
%9:vgpr_32 = V_OR_B32_e64 %8, %6, implicit $exec
- FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
%10:vgpr_32 = V_OR_B32_e64 %6, %8, implicit $exec
- FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
%11:vgpr_32 = V_OR_B32_e32 %8, %6, implicit $exec
- FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_OR_B32_e64 %8, %8, implicit $exec
- FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_OR_B32_e64 %16, %16, implicit $exec
- FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -560,7 +560,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -570,7 +570,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_ORN2_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -593,7 +593,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -603,7 +603,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_NAND_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -626,7 +626,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -636,7 +636,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_NOR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -659,7 +659,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@@ -669,7 +669,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_XNOR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
- BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -723,7 +723,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@@ -738,25 +738,25 @@ body: |
%7:sreg_32_xm0 = S_MOV_B32 1
%27:sreg_32_xm0 = S_MOV_B32 -4
%11:vgpr_32 = V_LSHLREV_B32_e64 12, %10, implicit $exec
- FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
- FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_LSHL_B32_e64 %7, 12, implicit $exec
- FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_LSHL_B32_e64 12, %7, implicit $exec
- FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_LSHL_B32_e64 12, %24, implicit $exec
- FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_LSHL_B32_e64 %6, 12, implicit $exec
- FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_LSHL_B32_e64 %6, 32, implicit $exec
- FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_LSHL_B32_e32 %6, %6, implicit $exec
- FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_LSHLREV_B32_e32 11, %24, implicit $exec
- FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_LSHL_B32_e32 %27, %6, implicit $exec
- FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
index 0a8b3d89c817..b224922f2234 100644
--- a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
+++ b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir
@@ -37,7 +37,7 @@ body: |
; GCN: S_BRANCH %bb.3
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
- ; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ ; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
; GCN: dead %18:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
; GCN: dead %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -89,7 +89,7 @@ body: |
S_BRANCH %bb.3
bb.3:
- dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
+ dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
index a4c8fdfa77df..e427cd51580c 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
@@ -66,9 +66,9 @@ body: |
; CHECK: dead %16:vgpr_32 = COPY %11.sub0
; CHECK: undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec
; CHECK: dead undef %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, %18, 0, implicit $exec
- ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; CHECK: dead %20:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec
- ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
@@ -79,7 +79,7 @@ body: |
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: dead %21:sreg_64 = COPY $exec
- ; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ ; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: DBG_VALUE %22, $noreg, <0x{{[0-9a-f]+}}>, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !DILocation(line: 0, scope: <0x{{[0-9a-f]+}}>)
; CHECK: bb.5:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -109,9 +109,9 @@ body: |
dead %16:vgpr_32 = COPY %11.sub0
undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 %6.sub0, %8.sub0, 0, implicit $exec
dead %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 %6.sub1, %8.sub1, %18, 0, implicit $exec
- %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+ %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, implicit $exec :: (load 8, addrspace 1)
dead %20:sreg_64 = V_CMP_GT_I32_e64 4, %9, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+ GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, implicit $exec :: (store 8, addrspace 1)
bb.2:
%5:vgpr_32 = COPY %13
@@ -122,7 +122,7 @@ body: |
bb.4:
dead %21:sreg_64 = COPY $exec
- %22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ %22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec :: (load 16, addrspace 1)
DBG_VALUE %22, $noreg, !16, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !21
bb.5:
diff --git a/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir b/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
index b333e5dbb409..bef24bcfa294 100644
--- a/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
+++ b/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir
@@ -11,7 +11,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
- %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
+ %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
S_NOP 0, implicit-def %4:sgpr_128, implicit %10.sub1:sgpr_128
S_CBRANCH_SCC0 %bb.3, implicit undef $scc
S_BRANCH %bb.1
@@ -26,7 +26,7 @@ body: |
S_BRANCH %bb.4
bb.3:
- %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
+ %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
%7:sreg_32_xm0 = COPY %10.sub1:sgpr_128
%8:sreg_32_xm0 = COPY %10.sub2:sgpr_128
diff --git a/llvm/test/CodeGen/AMDGPU/dead-lane.mir b/llvm/test/CodeGen/AMDGPU/dead-lane.mir
index b91076112a0e..8e12265eca3f 100644
--- a/llvm/test/CodeGen/AMDGPU/dead-lane.mir
+++ b/llvm/test/CodeGen/AMDGPU/dead-lane.mir
@@ -12,7 +12,7 @@ body: |
%1:vgpr_32 = nofpexcept V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $mode, implicit $exec
%3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
- FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/dead_copy.mir b/llvm/test/CodeGen/AMDGPU/dead_copy.mir
index ebfd720a9dcf..4365c49455f4 100644
--- a/llvm/test/CodeGen/AMDGPU/dead_copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/dead_copy.mir
@@ -23,5 +23,5 @@ body: |
$vgpr10 = COPY killed $sgpr14, implicit $exec
$vgpr11 = COPY killed $sgpr15, implicit $exec
- FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
...
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index e1d0b38465c5..1da9cc14327a 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -71,7 +71,7 @@ body: |
; CHECK: dead %26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; CHECK: dead %27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec
; CHECK: dead %28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $mode, implicit $exec
- ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -129,7 +129,7 @@ body: |
%26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %4, 0, %1, 0, 0, implicit $mode, implicit $exec
%27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %5, 0, %2, 0, 0, implicit $mode, implicit $exec
%28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %6, 0, %3, 0, 0, implicit $mode, implicit $exec
- GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %11, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
index 4a1b03df8952..7522af1724eb 100644
--- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
+++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
@@ -17,7 +17,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -25,7 +25,7 @@ body: |
---
# GCN-LABEL: name: load_without_memoperand
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
name: load_without_memoperand
tracksRegLiveness: true
@@ -41,7 +41,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -49,7 +49,7 @@ body: |
---
# GCN-LABEL: name: load_volatile
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
# GCN-NEXT: S_ENDPGM 0
name: load_volatile
tracksRegLiveness: true
@@ -65,7 +65,7 @@ body: |
%0 = IMPLICIT_DEF
%3 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
+ %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%4 = S_ADD_U32 %3, 1, implicit-def $scc
S_ENDPGM 0
@@ -73,7 +73,7 @@ body: |
---
# GCN-LABEL: name: store
# GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
-# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
# GCN-NEXT: S_ENDPGM 0
name: store
tracksRegLiveness: true
@@ -86,7 +86,7 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
$sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
- FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
index 2b8435405d86..53db02927754 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
+++ b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir
@@ -24,8 +24,8 @@ body: |
%0:sgpr_64 = COPY killed $sgpr0_sgpr1
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 36, 0, 0
- %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0(p4), 44, 0, 0
+ %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 36, 0
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0(p4), 44, 0
%4:sreg_32 = S_ADD_I32 %3, 1, implicit-def dead $scc
%5:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%6:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
@@ -53,49 +53,49 @@ body: |
%28:vgpr_32 = COPY %23.sub13
%29:vgpr_32 = COPY %23.sub12
%30:vreg_128 = REG_SEQUENCE killed %29, %subreg.sub0, killed %28, %subreg.sub1, killed %27, %subreg.sub2, killed %26, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %30, %2, 48, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %30, %2, 48, 0, implicit $exec
%31:vgpr_32 = COPY %23.sub11
%32:vgpr_32 = COPY %23.sub10
%33:vgpr_32 = COPY %23.sub9
%34:vgpr_32 = COPY %23.sub8
%35:vreg_128 = REG_SEQUENCE killed %34, %subreg.sub0, killed %33, %subreg.sub1, killed %32, %subreg.sub2, killed %31, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %35, %2, 32, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %35, %2, 32, 0, implicit $exec
%36:vgpr_32 = COPY %23.sub7
%37:vgpr_32 = COPY %23.sub6
%38:vgpr_32 = COPY %23.sub5
%39:vgpr_32 = COPY %23.sub4
%40:vreg_128 = REG_SEQUENCE killed %39, %subreg.sub0, killed %38, %subreg.sub1, killed %37, %subreg.sub2, killed %36, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %40, %2, 16, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %40, %2, 16, 0, implicit $exec
%41:vgpr_32 = COPY %23.sub3
%42:vgpr_32 = COPY %23.sub2
%43:vgpr_32 = COPY %23.sub1
%44:vgpr_32 = COPY killed %23.sub0
%45:vreg_128 = REG_SEQUENCE killed %44, %subreg.sub0, killed %43, %subreg.sub1, killed %42, %subreg.sub2, killed %41, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %45, %2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %45, %2, 0, 0, implicit $exec
%46:vgpr_32 = COPY %25.sub15
%47:vgpr_32 = COPY %25.sub14
%48:vgpr_32 = COPY %25.sub13
%49:vgpr_32 = COPY %25.sub12
%50:vreg_128 = REG_SEQUENCE killed %49, %subreg.sub0, killed %48, %subreg.sub1, killed %47, %subreg.sub2, killed %46, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %50, %2, 112, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %50, %2, 112, 0, implicit $exec
%51:vgpr_32 = COPY %25.sub11
%52:vgpr_32 = COPY %25.sub10
%53:vgpr_32 = COPY %25.sub9
%54:vgpr_32 = COPY %25.sub8
%55:vreg_128 = REG_SEQUENCE killed %54, %subreg.sub0, killed %53, %subreg.sub1, killed %52, %subreg.sub2, killed %51, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %55, %2, 96, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %55, %2, 96, 0, implicit $exec
%56:vgpr_32 = COPY %25.sub7
%57:vgpr_32 = COPY %25.sub6
%58:vgpr_32 = COPY %25.sub5
%59:vgpr_32 = COPY %25.sub4
%60:vreg_128 = REG_SEQUENCE killed %59, %subreg.sub0, killed %58, %subreg.sub1, killed %57, %subreg.sub2, killed %56, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR %1, killed %60, %2, 80, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR %1, killed %60, %2, 80, 0, implicit $exec
%61:vgpr_32 = COPY %25.sub3
%62:vgpr_32 = COPY %25.sub2
%63:vgpr_32 = COPY %25.sub1
%64:vgpr_32 = COPY killed %25.sub0
%65:vreg_128 = REG_SEQUENCE killed %64, %subreg.sub0, killed %63, %subreg.sub1, killed %62, %subreg.sub2, killed %61, %subreg.sub3
- GLOBAL_STORE_DWORDX4_SADDR killed %1, killed %65, killed %2, 64, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4_SADDR killed %1, killed %65, killed %2, 64, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index efc67993b55f..25a7f75a21e9 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -12,7 +12,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
@@ -21,7 +21,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
; GCN: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
+ ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; GCN: S_ENDPGM 0
main_body:
%tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
index 83cfb1278acd..2b6b36b24a05 100644
--- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
@@ -17,8 +17,8 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -27,8 +27,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
@@ -52,11 +52,11 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec
+ ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -65,10 +65,10 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
@@ -96,7 +96,7 @@ body: |
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
; GCN: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -106,7 +106,7 @@ body: |
bb.1:
%1:vgpr_32 = V_ADD_U32_e32 1, undef %1, implicit $exec
- GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
@@ -130,7 +130,7 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, implicit $exec
; GCN: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
@@ -141,7 +141,7 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
- GLOBAL_STORE_DWORD %0, undef %1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, undef %1, 0, 0, implicit $exec
%1:vgpr_32 = V_ADD_U32_e64 1, 1, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
@@ -166,8 +166,8 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
- ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3
- ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3
+ ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
@@ -176,8 +176,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
bb.1:
- undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, 0, 0, 0, implicit $exec
+ undef %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %0, undef %1.sub1, 0, 0, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
index c9bf75ef681f..2eb9a3f30177 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
@@ -54,24 +54,24 @@ body: |
%1 = COPY $sgpr4_sgpr5
%0 = COPY $vgpr0
- %3 = S_LOAD_DWORDX2_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%2 = V_MOV_B32_e32 0, implicit $exec
undef %12.sub0 = V_ADD_CO_U32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
%11 = COPY %4.sub1
%12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
+ %5 = FLAT_LOAD_DWORD %12, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
undef %9.sub0 = V_ADD_CO_U32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
%8 = COPY %3.sub1
%9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
undef %13.sub0 = V_ADD_CO_U32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
%13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
+ %6 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
undef %10.sub0 = V_ADD_CO_U32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
%10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
- FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
- FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
+ FLAT_STORE_DWORD %9, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
+ FLAT_STORE_DWORD %10, %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
index 5a76164a6706..34a12bc54972 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
@@ -9,10 +9,10 @@ body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0
...
@@ -25,10 +25,10 @@ body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
- ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:sgpr_32 = S_MOV_B32 %stack.0
- %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0
...
@@ -42,11 +42,11 @@ body: |
; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = IMPLICIT_DEF
- SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...
@@ -60,11 +60,11 @@ body: |
; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = IMPLICIT_DEF
- SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...
@@ -78,11 +78,11 @@ body: |
; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:sgpr_32 = S_MOV_B32 %stack.0
%1:vgpr_32 = IMPLICIT_DEF
- SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir b/llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir
index 038f9532ac43..255acb76bbfa 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir
@@ -22,13 +22,13 @@ body: |
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
; GCN: SI_RETURN_TO_EPILOG $vgpr0
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
%1:sreg_32_xm0 = S_MOV_B32 0
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %3
SI_RETURN_TO_EPILOG $vgpr0
@@ -54,12 +54,12 @@ body: |
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
; GCN: SI_RETURN_TO_EPILOG $vgpr0
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %3
SI_RETURN_TO_EPILOG $vgpr0
@@ -83,16 +83,16 @@ body: |
; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GCN: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
%2:sreg_32_xm0 = S_MOV_B32 0
- BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %3
S_ENDPGM 0, implicit $vgpr0
@@ -115,15 +115,15 @@ body: |
; GCN-LABEL: name: kernel_fold_fi_mubuf
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GCN: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %2
S_ENDPGM 0, implicit $vgpr0
@@ -152,13 +152,13 @@ body: |
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
; GCN: SI_RETURN_TO_EPILOG $vgpr0
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
%1:sreg_32_xm0 = S_MOV_B32 0
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %3
SI_RETURN_TO_EPILOG $vgpr0
@@ -185,12 +185,12 @@ body: |
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
; GCN: SI_RETURN_TO_EPILOG $vgpr0
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
- %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %3
SI_RETURN_TO_EPILOG $vgpr0
@@ -214,15 +214,15 @@ body: |
; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GCN: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %2
S_ENDPGM 0, implicit $vgpr0
@@ -246,15 +246,15 @@ body: |
; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GCN: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %2
S_ENDPGM 0, implicit $vgpr0
@@ -278,15 +278,15 @@ body: |
; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GCN: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = COPY %2
S_ENDPGM 0, implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index 399b55eea3b4..42ffb89e450b 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -12,12 +12,12 @@ body: |
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr0_sgpr1
- %2:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0
+ %2:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
%3:sreg_32_xm0 = S_MOV_B32 2
%4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1
- %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, implicit $exec
%8:sreg_32_xm0 = S_MOV_B32 65535
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
index c2f50953dcbf..c98ebee89c2f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
@@ -158,10 +158,10 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = V_MOV_B32_e32 1065353216, implicit $exec
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -222,13 +222,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1065353216, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -289,14 +289,14 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -360,16 +360,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -427,13 +427,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -494,16 +494,16 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 -2, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -564,13 +564,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 15360, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
- BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -631,13 +631,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 80886784, implicit $exec
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
@@ -697,13 +697,13 @@ body: |
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
- %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 305413120, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
- BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
index 3745509a45e3..794857ed6059 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
@@ -46,9 +46,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -60,13 +60,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -117,9 +117,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -131,13 +131,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $mode, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -188,9 +188,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -202,13 +202,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = nofpexcept V_MAD_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -259,9 +259,9 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
- %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
+ %6 = S_LOAD_DWORDX2_IMM %0, 13, 0
%27 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%28 = REG_SEQUENCE %3, 1, %27, 2
%11 = S_MOV_B32 61440
@@ -273,13 +273,13 @@ body: |
%17 = REG_SEQUENCE killed %6, 17, %13, 18
%18 = REG_SEQUENCE killed %4, 17, %13, 18
%20 = COPY %29
- %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec
%22 = COPY %29
- %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec
%23 = V_MOV_B32_e32 1090519040, implicit $exec
%24 = nofpexcept V_MAD_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $mode, implicit $exec
%26 = COPY %29
- BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
index fd3b88118288..625c23d48817 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
@@ -34,7 +34,7 @@ body: |
%3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc
%4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec
%5 = IMPLICIT_DEF
- BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
index a33c97c4f5bb..2025db5bd839 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
@@ -4,7 +4,7 @@
# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full
# GCN: %0:sgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0
+# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0
name: fold_sgpr_to_sgpr_copy_full
body: |
@@ -13,13 +13,13 @@ body: |
%0:sgpr_32 = IMPLICIT_DEF
%1:sgpr_32 = COPY %0
%2:sgpr_32 = COPY %1
- S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0
...
# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg
# GCN: %0:sreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0
-# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0
name: fold_sgpr_to_sgpr_copy_subreg
body: |
@@ -28,13 +28,13 @@ body: |
%0:sreg_64 = IMPLICIT_DEF
%1:sgpr_32 = COPY %0.sub0
%2:sgpr_32 = COPY %1
- S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0
...
# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2
# GCN: %0:sreg_64 = IMPLICIT_DEF
# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0
-# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0
name: fold_sgpr_to_sgpr_copy_subreg2
body: |
@@ -44,5 +44,5 @@ body: |
%1:sgpr_32 = COPY %0.sub0
%2:sgpr_32 = COPY %1
%3:sreg_32_xm0_xexec = COPY %2
- S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+ S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
index e582acae4203..51ad23780803 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
@@ -8,7 +8,7 @@
name: flat_atomic_fcmpswap_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -20,7 +20,7 @@ body: |
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -32,7 +32,7 @@ body: |
name: flat_atomic_fmax_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -44,7 +44,7 @@ body: |
name: flat_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -56,7 +56,7 @@ body: |
name: flat_atomic_fmin_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -68,7 +68,7 @@ body: |
name: flat_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -80,7 +80,7 @@ body: |
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -92,7 +92,7 @@ body: |
name: flat_atomic_fmax_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -104,7 +104,7 @@ body: |
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -116,7 +116,7 @@ body: |
name: flat_atomic_fmin_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -128,7 +128,7 @@ body: |
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -140,7 +140,7 @@ body: |
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -152,7 +152,7 @@ body: |
name: global_atomic_fcmpswap_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -164,7 +164,7 @@ body: |
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -176,7 +176,7 @@ body: |
name: global_atomic_fmax_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -188,7 +188,7 @@ body: |
name: global_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -200,7 +200,7 @@ body: |
name: global_atomic_fmin_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -212,7 +212,7 @@ body: |
name: global_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -224,7 +224,7 @@ body: |
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -236,7 +236,7 @@ body: |
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -248,7 +248,7 @@ body: |
name: global_atomic_fmax_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -260,7 +260,7 @@ body: |
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -272,7 +272,7 @@ body: |
name: global_atomic_fmin_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -284,7 +284,7 @@ body: |
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -296,7 +296,7 @@ body: |
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -308,7 +308,7 @@ body: |
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -320,7 +320,7 @@ body: |
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -332,7 +332,7 @@ body: |
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -344,7 +344,7 @@ body: |
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -356,7 +356,7 @@ body: |
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, -1, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -368,7 +368,7 @@ body: |
name: flat_fp_atomic_to_s_denorm_mode_waitcnt
body: |
bb.0:
- FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_WAITCNT 0
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -381,7 +381,7 @@ body: |
name: flat_fp_atomic_to_s_denorm_mode_valu
body: |
bb.0:
- FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $mode, implicit $exec
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
index 5d283f6bcc5d..506f9a77c177 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -9,9 +9,9 @@ body: |
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: nop1
; CHECK: liveins: $sgpr0_sgpr1
- ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK: S_NOP 2
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_NOP 2
...
@@ -25,13 +25,13 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
; CHECK: S_CLAUSE 2
- ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; CHECK: S_NOP 2
- ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0
+ ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
; CHECK: }
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_NOP 2
- $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
...
---
@@ -44,168 +44,168 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
; CHECK: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit-def $vgpr4, implicit-def $vgpr4_lo16, implicit-def $vgpr4_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit-def $vgpr6, implicit-def $vgpr6_lo16, implicit-def $vgpr6_hi16, implicit-def $vgpr7, implicit-def $vgpr7_lo16, implicit-def $vgpr7_hi16, implicit-def $vgpr8, implicit-def $vgpr8_lo16, implicit-def $vgpr8_hi16, implicit-def $vgpr9, implicit-def $vgpr9_lo16, implicit-def $vgpr9_hi16, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr15, implicit-def $vgpr15_lo16, implicit-def $vgpr15_hi16, implicit-def $vgpr16, implicit-def $vgpr16_lo16, implicit-def $vgpr16_hi16, implicit-def $vgpr17, implicit-def $vgpr17_lo16, implicit-def $vgpr17_hi16, implicit-def $vgpr18, implicit-def $vgpr18_lo16, implicit-def $vgpr18_hi16, implicit-def $vgpr19, implicit-def $vgpr19_lo16, implicit-def $vgpr19_hi16, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr24, implicit-def $vgpr24_lo16, implicit-def $vgpr24_hi16, implicit-def $vgpr25, implicit-def $vgpr25_lo16, implicit-def $vgpr25_hi16, implicit-def $vgpr26, implicit-def $vgpr26_lo16, implicit-def $vgpr26_hi16, implicit-def $vgpr27, implicit-def $vgpr27_lo16, implicit-def $vgpr27_hi16, implicit-def $vgpr28, implicit-def $vgpr28_lo16, implicit-def $vgpr28_hi16, implicit-def $vgpr29, implicit-def $vgpr29_lo16, implicit-def $vgpr29_hi16, implicit-def $vgpr30, implicit-def $vgpr30_lo16, implicit-def $vgpr30_hi16, implicit-def $vgpr31, implicit-def $vgpr31_lo16, implicit-def $vgpr31_hi16, implicit-def $vgpr32, implicit-def $vgpr32_lo16, implicit-def $vgpr32_hi16, implicit-def $vgpr33, implicit-def $vgpr33_lo16, implicit-def $vgpr33_hi16, implicit-def $vgpr34, implicit-def $vgpr34_lo16, implicit-def $vgpr34_hi16, implicit-def $vgpr35, implicit-def $vgpr35_lo16, implicit-def $vgpr35_hi16, implicit-def $vgpr36, implicit-def $vgpr36_lo16, implicit-def $vgpr36_hi16, implicit-def $vgpr37, implicit-def $vgpr37_lo16, implicit-def $vgpr37_hi16, implicit-def $vgpr38, implicit-def $vgpr38_lo16, implicit-def $vgpr38_hi16, implicit-def $vgpr39, implicit-def $vgpr39_lo16, implicit-def $vgpr39_hi16, implicit-def $vgpr40, implicit-def $vgpr40_lo16, implicit-def $vgpr40_hi16, implicit-def $vgpr41, implicit-def $vgpr41_lo16, implicit-def $vgpr41_hi16, implicit-def $vgpr42, implicit-def $vgpr42_lo16, implicit-def $vgpr42_hi16, implicit-def $vgpr43, implicit-def $vgpr43_lo16, implicit-def $vgpr43_hi16, implicit-def $vgpr44, implicit-def $vgpr44_lo16, implicit-def $vgpr44_hi16, implicit-def $vgpr45, implicit-def $vgpr45_lo16, implicit-def $vgpr45_hi16, implicit-def $vgpr46, implicit-def $vgpr46_lo16, implicit-def $vgpr46_hi16, implicit-def $vgpr47, implicit-def $vgpr47_lo16, implicit-def $vgpr47_hi16, implicit-def $vgpr48, implicit-def $vgpr48_lo16, implicit-def $vgpr48_hi16, implicit-def $vgpr49, implicit-def $vgpr49_lo16, implicit-def $vgpr49_hi16, implicit-def $vgpr50, implicit-def $vgpr50_lo16, implicit-def $vgpr50_hi16, implicit-def $vgpr51, implicit-def $vgpr51_lo16, implicit-def $vgpr51_hi16, implicit-def $vgpr52, implicit-def $vgpr52_lo16, implicit-def $vgpr52_hi16, implicit-def $vgpr53, implicit-def $vgpr53_lo16, implicit-def $vgpr53_hi16, implicit-def $vgpr54, implicit-def $vgpr54_lo16, implicit-def $vgpr54_hi16, implicit-def $vgpr55, implicit-def $vgpr55_lo16, implicit-def $vgpr55_hi16, implicit-def $vgpr56, implicit-def $vgpr56_lo16, implicit-def $vgpr56_hi16, implicit-def $vgpr57, implicit-def $vgpr57_lo16, implicit-def $vgpr57_hi16, implicit-def $vgpr58, implicit-def $vgpr58_lo16, implicit-def $vgpr58_hi16, implicit-def $vgpr59, implicit-def $vgpr59_lo16, implicit-def $vgpr59_hi16, implicit-def $vgpr60, implicit-def $vgpr60_lo16, implicit-def $vgpr60_hi16, implicit-def $vgpr61, implicit-def $vgpr61_lo16, implicit-def $vgpr61_hi16, implicit-def $vgpr62, implicit-def $vgpr62_lo16, implicit-def $vgpr62_hi16, implicit-def $vgpr63, implicit-def $vgpr63_lo16, implicit-def $vgpr63_hi16, implicit-def $vgpr64, implicit-def $vgpr64_lo16, implicit-def $vgpr64_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; CHECK: S_CLAUSE 63
- ; CHECK: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr24 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr25 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr26 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr35 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 140, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr36 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 144, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr37 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 148, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr38 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 152, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr39 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 156, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr40 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 160, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr41 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 164, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr42 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 168, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr43 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 172, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr44 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 176, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr45 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 180, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr46 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 184, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr47 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 188, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr48 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 192, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr49 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 196, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr50 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 200, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr51 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 204, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr52 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 208, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr53 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 212, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr54 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 216, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr55 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 220, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr56 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 224, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr57 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 228, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr58 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 232, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr59 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 236, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr60 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 240, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr61 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 244, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr62 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 248, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr24 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr25 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr26 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr35 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 140, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr36 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 144, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr37 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 148, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr38 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 152, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr39 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 156, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr40 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 160, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr41 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 164, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr42 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 168, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr43 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 172, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr44 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 176, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr45 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 180, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr46 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 184, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr47 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 188, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr48 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 192, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr49 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 196, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr50 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 200, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr51 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 204, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr52 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 208, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr53 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 212, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr54 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 216, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr55 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 220, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr56 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 224, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr57 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 228, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr58 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 232, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr59 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 236, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr60 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 240, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr61 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 244, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr62 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 248, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, 0, implicit $exec
; CHECK: }
; CHECK: BUNDLE implicit-def $vgpr65, implicit-def $vgpr65_lo16, implicit-def $vgpr65_hi16, implicit-def $vgpr66, implicit-def $vgpr66_lo16, implicit-def $vgpr66_hi16, implicit-def $vgpr67, implicit-def $vgpr67_lo16, implicit-def $vgpr67_hi16, implicit-def $vgpr68, implicit-def $vgpr68_lo16, implicit-def $vgpr68_hi16, implicit-def $vgpr69, implicit-def $vgpr69_lo16, implicit-def $vgpr69_hi16, implicit-def $vgpr70, implicit-def $vgpr70_lo16, implicit-def $vgpr70_hi16, implicit-def $vgpr71, implicit-def $vgpr71_lo16, implicit-def $vgpr71_hi16, implicit-def $vgpr72, implicit-def $vgpr72_lo16, implicit-def $vgpr72_hi16, implicit-def $vgpr73, implicit-def $vgpr73_lo16, implicit-def $vgpr73_hi16, implicit-def $vgpr74, implicit-def $vgpr74_lo16, implicit-def $vgpr74_hi16, implicit-def $vgpr75, implicit-def $vgpr75_lo16, implicit-def $vgpr75_hi16, implicit-def $vgpr76, implicit-def $vgpr76_lo16, implicit-def $vgpr76_hi16, implicit-def $vgpr77, implicit-def $vgpr77_lo16, implicit-def $vgpr77_hi16, implicit-def $vgpr78, implicit-def $vgpr78_lo16, implicit-def $vgpr78_hi16, implicit-def $vgpr79, implicit-def $vgpr79_lo16, implicit-def $vgpr79_hi16, implicit-def $vgpr80, implicit-def $vgpr80_lo16, implicit-def $vgpr80_hi16, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
; CHECK: S_CLAUSE 15
- ; CHECK: $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr67 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 268, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr68 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 272, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr69 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 276, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr70 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 280, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr71 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 284, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr72 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 288, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr73 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 292, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr74 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 296, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr75 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 300, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr76 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 304, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr77 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 308, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr78 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 312, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr79 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 316, 0, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: $vgpr80 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 320, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr67 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 268, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr68 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 272, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr69 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 276, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr70 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 280, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr71 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 284, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr72 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 288, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr73 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 292, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr74 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 296, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr75 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 300, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr76 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 304, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr77 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 308, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr78 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 312, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr79 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 316, 0, 0, 0, implicit $exec
+ ; CHECK: $vgpr80 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 320, 0, 0, 0, implicit $exec
; CHECK: }
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr24 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr25 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr26 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr35 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 140, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr36 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 144, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr37 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 148, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr38 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 152, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr39 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 156, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr40 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 160, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr41 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 164, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr42 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 168, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr43 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 172, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr44 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 176, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr45 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 180, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr46 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 184, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr47 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 188, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr48 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 192, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr49 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 196, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr50 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 200, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr51 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 204, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr52 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 208, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr53 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 212, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr54 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 216, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr55 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 220, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr56 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 224, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr57 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 228, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr58 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 232, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr59 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 236, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr60 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 240, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr61 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 244, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr62 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 248, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr67 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 268, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr68 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 272, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr69 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 276, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr70 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 280, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr71 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 284, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr72 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 288, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr73 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 292, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr74 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 296, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr75 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 300, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr76 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 304, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr77 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 308, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr78 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 312, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr79 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 316, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr80 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 320, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, implicit $exec
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, implicit $exec
+ $vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec
+ $vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, implicit $exec
+ $vgpr6 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, implicit $exec
+ $vgpr7 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, implicit $exec
+ $vgpr8 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, implicit $exec
+ $vgpr9 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, implicit $exec
+ $vgpr10 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, implicit $exec
+ $vgpr11 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, implicit $exec
+ $vgpr12 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, implicit $exec
+ $vgpr13 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, implicit $exec
+ $vgpr14 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, implicit $exec
+ $vgpr15 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, implicit $exec
+ $vgpr16 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, implicit $exec
+ $vgpr17 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, implicit $exec
+ $vgpr18 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, implicit $exec
+ $vgpr19 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, implicit $exec
+ $vgpr20 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, implicit $exec
+ $vgpr21 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, implicit $exec
+ $vgpr22 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, implicit $exec
+ $vgpr23 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, implicit $exec
+ $vgpr24 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, implicit $exec
+ $vgpr25 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, implicit $exec
+ $vgpr26 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, implicit $exec
+ $vgpr27 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, implicit $exec
+ $vgpr28 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, implicit $exec
+ $vgpr29 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, implicit $exec
+ $vgpr30 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, implicit $exec
+ $vgpr31 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, implicit $exec
+ $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, implicit $exec
+ $vgpr33 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, implicit $exec
+ $vgpr34 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, implicit $exec
+ $vgpr35 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 140, 0, 0, 0, implicit $exec
+ $vgpr36 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 144, 0, 0, 0, implicit $exec
+ $vgpr37 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 148, 0, 0, 0, implicit $exec
+ $vgpr38 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 152, 0, 0, 0, implicit $exec
+ $vgpr39 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 156, 0, 0, 0, implicit $exec
+ $vgpr40 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 160, 0, 0, 0, implicit $exec
+ $vgpr41 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 164, 0, 0, 0, implicit $exec
+ $vgpr42 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 168, 0, 0, 0, implicit $exec
+ $vgpr43 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 172, 0, 0, 0, implicit $exec
+ $vgpr44 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 176, 0, 0, 0, implicit $exec
+ $vgpr45 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 180, 0, 0, 0, implicit $exec
+ $vgpr46 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 184, 0, 0, 0, implicit $exec
+ $vgpr47 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 188, 0, 0, 0, implicit $exec
+ $vgpr48 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 192, 0, 0, 0, implicit $exec
+ $vgpr49 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 196, 0, 0, 0, implicit $exec
+ $vgpr50 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 200, 0, 0, 0, implicit $exec
+ $vgpr51 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 204, 0, 0, 0, implicit $exec
+ $vgpr52 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 208, 0, 0, 0, implicit $exec
+ $vgpr53 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 212, 0, 0, 0, implicit $exec
+ $vgpr54 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 216, 0, 0, 0, implicit $exec
+ $vgpr55 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 220, 0, 0, 0, implicit $exec
+ $vgpr56 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 224, 0, 0, 0, implicit $exec
+ $vgpr57 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 228, 0, 0, 0, implicit $exec
+ $vgpr58 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 232, 0, 0, 0, implicit $exec
+ $vgpr59 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 236, 0, 0, 0, implicit $exec
+ $vgpr60 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 240, 0, 0, 0, implicit $exec
+ $vgpr61 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 244, 0, 0, 0, implicit $exec
+ $vgpr62 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 248, 0, 0, 0, implicit $exec
+ $vgpr63 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 252, 0, 0, 0, implicit $exec
+ $vgpr64 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 256, 0, 0, 0, implicit $exec
+ $vgpr65 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, 0, implicit $exec
+ $vgpr66 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, 0, implicit $exec
+ $vgpr67 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 268, 0, 0, 0, implicit $exec
+ $vgpr68 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 272, 0, 0, 0, implicit $exec
+ $vgpr69 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 276, 0, 0, 0, implicit $exec
+ $vgpr70 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 280, 0, 0, 0, implicit $exec
+ $vgpr71 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 284, 0, 0, 0, implicit $exec
+ $vgpr72 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 288, 0, 0, 0, implicit $exec
+ $vgpr73 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 292, 0, 0, 0, implicit $exec
+ $vgpr74 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 296, 0, 0, 0, implicit $exec
+ $vgpr75 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 300, 0, 0, 0, implicit $exec
+ $vgpr76 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 304, 0, 0, 0, implicit $exec
+ $vgpr77 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 308, 0, 0, 0, implicit $exec
+ $vgpr78 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 312, 0, 0, 0, implicit $exec
+ $vgpr79 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 316, 0, 0, 0, implicit $exec
+ $vgpr80 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 320, 0, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir b/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
index 1ac363471723..b6aa7a54ebce 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir
@@ -12,7 +12,7 @@ body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec
$vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
index 496416fd9864..fa42bfd5a25e 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
@@ -13,9 +13,9 @@ name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
body: |
bb.0:
BUNDLE implicit-def $sgpr6_sgpr7 {
- $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
+ $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
}
- $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
@@ -52,9 +52,9 @@ body: |
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
}
BUNDLE implicit-def $sgpr0_sgpr1, implicit $sgpr10_sgpr11 {
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
}
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -84,5 +84,5 @@ body: |
}
bb.2:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
index 7f99b4af154a..167dc83d5485 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
@@ -15,14 +15,14 @@ name: break_smem_clause_max_look_ahead_in_bundle
body: |
bb.0:
BUNDLE implicit-def $sgpr6_sgpr7 {
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 4, 0, 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 8, 0, 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 12, 0, 0
- S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 16, 0, 0
- $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
- $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM $sgpr14_sgpr15, 0, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 4, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 8, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 12, 0
+ S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 16, 0
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM $sgpr14_sgpr15, 0, 0
}
S_ENDPGM 0
...
@@ -37,7 +37,7 @@ body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
BUNDLE implicit-def $sgpr0_sgpr1 {
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
}
S_ENDPGM 0
@@ -77,8 +77,8 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
}
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir b/llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
index 1a0bc39458cf..b5850356cb56 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
@@ -16,7 +16,7 @@ name: hazard-inlineasm
body: |
bb.0:
- FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
index 6602c079986e..ecc0894d707c 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
@@ -21,7 +21,7 @@ body: |
liveins: $sgpr2, $sgpr3, $sgpr4
$sgpr6 = S_MOV_B32 killed $sgpr3
- renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0
+ renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0
$m0 = S_MOV_B32 killed renamable $sgpr4
dead renamable $sgpr0 = KILL undef renamable $sgpr2
renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir b/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir
index c6d8994a090b..debd7edbd14e 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir
@@ -15,7 +15,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
$sgpr8_sgpr9 = S_MOV_B64 -1
- $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vcc = S_ANDN2_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir
index 97b6eba13754..49b34369521a 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-recognizer-meta-insts.mir
@@ -12,11 +12,11 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9-LABEL: name: global_store_dwordx4_data_hazard_kill
- ; GFX9: GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX9: GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec
; GFX9: $vgpr2 = KILL
; GFX9: S_NOP 0
; GFX9: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
- GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec
$vgpr2 = KILL
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
@@ -30,11 +30,11 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX9-LABEL: name: global_store_dwordx3_data_hazard_kill
- ; GFX9: GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX9: GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec
; GFX9: $vgpr2 = KILL
; GFX9: S_NOP 0
; GFX9: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
- GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec
$vgpr2 = KILL
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir
index 66f07d8db4e8..856893ae0205 100644
--- a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir
+++ b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir
@@ -11,7 +11,7 @@ body: |
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
+ ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORD_IMM]]
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
; GCN: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]](s32), [[S_LOAD_DWORD_IMM]], implicit $exec
@@ -65,7 +65,7 @@ body: |
%1:sgpr_64(p4) = COPY $sgpr4_sgpr5
%2:vgpr_32(s32) = COPY $vgpr0
- %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1:sgpr_64(p4), 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1:sgpr_64(p4), 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
%8:sreg_32 = COPY %3:sreg_32_xm0_xexec
%14:vgpr_32 = COPY %2:vgpr_32(s32)
%9:sreg_64 = V_CMP_LT_I32_e64 %2:vgpr_32(s32), %3:sreg_32_xm0_xexec, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index c74471b1b8fe..a582f64aaa22 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -13,7 +13,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
- ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
+ ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
; GCN: renamable $sgpr6 = COPY renamable $sgpr1
; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
; GCN: renamable $sgpr4 = S_MOV_B32 61440
@@ -99,7 +99,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: bb.2:
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5)
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
; GCN: S_ENDPGM 0
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
index 77e094ea081e..6ce629a0dc05 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
@@ -12,7 +12,7 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK: FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; CHECK: bb.2:
; CHECK: S_ENDPGM 0
bb.0:
@@ -22,7 +22,7 @@ body: |
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
- FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
bb.2:
S_ENDPGM 0
@@ -39,7 +39,7 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
; CHECK: bb.2:
; CHECK: S_ENDPGM 0
bb.0:
@@ -49,7 +49,7 @@ body: |
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
bb.2:
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
index 8d000e6da80a..c240e770c592 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
@@ -49,10 +49,10 @@ body: |
bb.0 (%ir-block.2):
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
- $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
$vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
$vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 03e1d12bd451..561b8a1bd47d 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -190,32 +190,32 @@ name: vmem_gt_8dw_store
body: |
bb.0:
- BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.1
bb.1:
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
- FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
index 693851023f7c..c41da0f25b35 100644
--- a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
@@ -55,7 +55,7 @@ body: |
bb.0.entry:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
@@ -64,7 +64,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
S_BRANCH %bb.3
@@ -72,7 +72,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.3.done:
@@ -80,7 +80,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
index 2df74fed26ee..00142fb67b02 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
@@ -12,7 +12,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -24,7 +24,7 @@ name: hazard_buf_branch_lds
body: |
bb.0:
successors: %bb.1
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -56,11 +56,11 @@ name: no_hazard_buf_branch_buf
body: |
bb.0:
successors: %bb.1
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -75,7 +75,7 @@ body: |
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -87,7 +87,7 @@ name: no_hazard_lds_branch_buf_samebb
body: |
bb.0:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -101,7 +101,7 @@ body: |
bb.0:
successors: %bb.0
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.0
...
@@ -118,8 +118,8 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -137,7 +137,7 @@ body: |
bb.1:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -150,11 +150,11 @@ body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -171,7 +171,7 @@ body: |
bb.1:
S_WAITCNT_VSCNT undef $sgpr_null, 1
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -189,7 +189,7 @@ body: |
bb.1:
S_WAITCNT_VSCNT undef $sgpr_null, 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -206,7 +206,7 @@ body: |
bb.1:
S_WAITCNT_VSCNT undef $sgpr0, 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -223,7 +223,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -239,7 +239,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -255,7 +255,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -271,6 +271,6 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
index 55f6ca6bbc96..c1896352ad87 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -57,15 +57,15 @@ body: |
%4.sub1 = COPY %3.sub0
undef %5.sub0 = COPY %4.sub1
%5.sub1 = COPY %4.sub0
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, implicit $exec, implicit $flat_scr
%6 = IMPLICIT_DEF
undef %7.sub0_sub1 = COPY %6
%7.sub2 = COPY %3.sub0
- FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, implicit $exec, implicit $flat_scr
%8 = IMPLICIT_DEF
undef %9.sub0_sub1_sub2 = COPY %8
%9.sub3 = COPY %3.sub0
- FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, implicit $exec, implicit $flat_scr
...
diff --git a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir
index 51273d56d381..b0a363ceb25b 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir
@@ -17,31 +17,31 @@ body: |
; CHECK-LABEL: name: soft_clause_bundle_out_of_registers
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load 64, align 4, addrspace 4)
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
%0:sgpr_64 = COPY $sgpr4_sgpr5
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, addrspace 4)
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load 8, addrspace 4)
%2:vreg_64 = IMPLICIT_DEF
bb.1:
undef %3.sub0:sreg_64 = S_ADD_U32 %1.sub0, 0, implicit-def $scc
%3.sub1:sreg_64 = S_ADDC_U32 %1.sub1, 0, implicit-def dead $scc, implicit killed $scc
- %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0, 0 :: (load 64, align 4, addrspace 4)
- %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0, 0 :: (load 64, align 4, addrspace 4)
- %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0, 0 :: (load 64, align 4, addrspace 4)
- %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0, 0 :: (load 64, align 4, addrspace 4)
- %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0, 0 :: (load 64, align 4, addrspace 4)
- %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0, 0 :: (load 64, align 4, addrspace 4)
- %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0, 0 :: (load 64, align 4, addrspace 4)
+ %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load 64, align 4, addrspace 4)
+ %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load 64, align 4, addrspace 4)
+ %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load 64, align 4, addrspace 4)
+ %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load 64, align 4, addrspace 4)
+ %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load 64, align 4, addrspace 4)
+ %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load 64, align 4, addrspace 4)
+ %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load 64, align 4, addrspace 4)
dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
dead %11:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub1, 0, 0, implicit $mode, implicit $exec
dead %12:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub2, 0, 0, implicit $mode, implicit $exec
@@ -118,10 +118,10 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
; CHECK-LABEL: name: simple_huge_reg_tuple_clause
- ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0, 0 :: (load 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load 64, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load 64, align 4, addrspace 4)
; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORDX16_IMM]]
%0:sreg_64 = COPY $sgpr4_sgpr5
%1:sreg_64 = S_MOV_B64 0
@@ -131,10 +131,10 @@ body: |
%5:sreg_64 = S_MOV_B64 4
%6:sreg_64 = S_MOV_B64 5
%7:sreg_64 = S_MOV_B64 6
- %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0, 0 :: (load 64, align 4, addrspace 4)
- %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0, 0 :: (load 64, align 4, addrspace 4)
- %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0, 0 :: (load 64, align 4, addrspace 4)
- %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0, 0 :: (load 64, align 4, addrspace 4)
+ %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load 64, align 4, addrspace 4)
+ %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load 64, align 4, addrspace 4)
+ %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load 64, align 4, addrspace 4)
+ %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load 64, align 4, addrspace 4)
S_NOP 0, implicit %8
S_NOP 0, implicit %9
S_NOP 0, implicit %10
diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
index d6da7cb45fc7..8901505b5a31 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
@@ -203,7 +203,7 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
- ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
+ ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
@@ -233,7 +233,7 @@ body: |
bb.1:
%11:sreg_64_xexec = COPY %13
- dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
+ dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
%14:sreg_64_xexec = COPY %11
bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
index 435529d1d2cb..73749edefb2e 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
@@ -401,7 +401,7 @@ name: smfma4x4_write_vgpr_vm_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
- BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma4x4_write_vgpr_flat_read
# GCN: V_MFMA
@@ -411,7 +411,7 @@ name: smfma4x4_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: smfma4x4_write_vgpr_lds_read
# GCN: V_MFMA
@@ -442,7 +442,7 @@ name: smfma16x16_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: smfma32x32_write_vgpr_flat_read
# GCN: V_MFMA
@@ -454,7 +454,7 @@ name: smfma32x32_write_vgpr_flat_read
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_overlap
# GCN: V_MFMA
@@ -465,7 +465,7 @@ name: dmfma4x4_write_vgpr_flat_read_overlap
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_full
# GCN: V_MFMA
@@ -476,7 +476,7 @@ name: dmfma4x4_write_vgpr_flat_read_full
body: |
bb.0:
$vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma16x16_write_vgpr_flat_read
# GCN: V_MFMA
@@ -488,7 +488,7 @@ name: dmfma16x16_write_vgpr_flat_read
body: |
bb.0:
$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: smfma4x4_write_vgpr_valu_read
# GCN: V_MFMA
@@ -897,7 +897,7 @@ body: |
name: flat_store_data_agpr_overwritten
body: |
bb.0:
- FLAT_STORE_DWORDX4 $vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX4 $vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
$agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_accv_read
@@ -1203,7 +1203,7 @@ name: dmfma4x4_write_agpr_flat_read_overlap
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_full
# GCN: V_MFMA
@@ -1214,7 +1214,7 @@ name: dmfma4x4_write_agpr_flat_read_full
body: |
bb.0:
$agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $agpr4_agpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $agpr4_agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma16x16_write_agpr_flat_read
# GCN: V_MFMA
@@ -1226,7 +1226,7 @@ name: dmfma16x16_write_agpr_flat_read
body: |
bb.0:
$agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec
- FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
...
# GCN-LABEL: name: dmfma4x4_write_agpr_valu_read
# GCN: V_MFMA
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
index 694fd7657529..370ef4f63577 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
@@ -312,7 +312,7 @@ name: accvgpr_read_write_vgpr_load
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
...
---
@@ -336,7 +336,7 @@ name: accvgpr_read_write_vgpr_flat_load
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
...
---
@@ -348,7 +348,7 @@ name: accvgpr_read_write_vgpr_buffer_store
body: |
bb.0:
$vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
...
---
@@ -373,7 +373,7 @@ body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
...
---
@@ -387,7 +387,7 @@ body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
...
---
@@ -401,7 +401,7 @@ body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
- $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
...
---
@@ -415,6 +415,6 @@ body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
index 4ad185e037f0..b8b898be0964 100644
--- a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
+++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
@@ -17,11 +17,11 @@ body: |
renamable $sgpr8_sgpr9 = S_GETPC_B64
renamable $sgpr8 = COPY killed renamable $sgpr2
- renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0, 0 :: (invariant load 32, align 16, addrspace 4)
+ renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0 :: (invariant load 32, align 16, addrspace 4)
renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
- renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, implicit $exec
renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
- renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16)
+ renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 188d76dd3142..93a26d5dd7f3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -75,13 +75,13 @@ body: |
successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
liveins: $vgpr0, $sgpr0_sgpr1
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec
$vgpr1_vgpr2 = V_LSHL_B64_e64 $vgpr0_vgpr1, 3, implicit $exec
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 0
S_WAITCNT 127
- $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
+ $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
$vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -92,7 +92,7 @@ body: |
successors: %bb.2.exit(0x80000000)
liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec
$sgpr4_sgpr5 = S_MOV_B64 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
index 5804667ad8d3..afe8a2750bf5 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
@@ -11,10 +11,10 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
- renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
+ renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@@ -47,7 +47,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
+ FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@@ -63,7 +63,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
+ FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir
index 34f5306c0838..26b248fef919 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir
@@ -13,14 +13,14 @@
name: load_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -37,14 +37,14 @@ body: |
name: load_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -61,14 +61,14 @@ body: |
name: load_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -85,14 +85,14 @@ body: |
name: load_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -109,14 +109,14 @@ body: |
name: load_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -133,14 +133,14 @@ body: |
name: load_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -157,14 +157,14 @@ body: |
name: load_wavefront_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -181,14 +181,14 @@ body: |
name: load_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -205,14 +205,14 @@ body: |
name: load_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -229,14 +229,14 @@ body: |
name: load_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -253,14 +253,14 @@ body: |
name: load_workgroup_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -277,14 +277,14 @@ body: |
name: load_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -301,14 +301,14 @@ body: |
name: load_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -325,14 +325,14 @@ body: |
name: load_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -349,14 +349,14 @@ body: |
name: load_agent_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -373,14 +373,14 @@ body: |
name: load_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -397,14 +397,14 @@ body: |
name: load_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -421,14 +421,14 @@ body: |
name: load_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -445,14 +445,14 @@ body: |
name: load_system_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -469,14 +469,14 @@ body: |
name: load_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -493,8 +493,8 @@ body: |
name: store_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body: |
name: store_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body: |
name: store_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body: |
name: store_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body: |
name: store_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body: |
name: store_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body: |
name: store_wavefront_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body: |
name: store_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body: |
name: store_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body: |
name: store_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body: |
name: store_workgroup_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body: |
name: store_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,8 +757,8 @@ body: |
name: store_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -779,8 +779,8 @@ body: |
name: store_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -801,8 +801,8 @@ body: |
name: store_agent_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -823,8 +823,8 @@ body: |
name: store_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -845,8 +845,8 @@ body: |
name: store_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -867,8 +867,8 @@ body: |
name: store_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -889,8 +889,8 @@ body: |
name: store_system_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -911,8 +911,8 @@ body: |
name: store_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -933,8 +933,8 @@ body: |
name: atomicrmw_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -955,8 +955,8 @@ body: |
name: atomicrmw_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -977,8 +977,8 @@ body: |
name: atomicrmw_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -999,8 +999,8 @@ body: |
name: atomicrmw_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1021,8 +1021,8 @@ body: |
name: atomicrmw_singlethread_acq_rel
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1043,8 +1043,8 @@ body: |
name: atomicrmw_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
index 0aec90f86b9d..a1c63153f0be 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
@@ -16,27 +16,27 @@ body: |
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
S_CBRANCH_SCC0 %bb.1, implicit killed $scc
bb.2:
successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3
@@ -45,7 +45,7 @@ body: |
successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -55,11 +55,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index f228fed9b2b2..d36b7d96f062 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -62,7 +62,7 @@
# CHECK-LABEL: name: multiple_mem_operands
# CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 3
name: multiple_mem_operands
alignment: 1
@@ -110,27 +110,27 @@ body: |
successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
bb.2.else:
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3.done
@@ -139,7 +139,7 @@ body: |
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -149,11 +149,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
index 2cd298a86914..2784c62a4af5 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
@@ -42,7 +42,7 @@
# CHECK-LABEL: name: multiple_mem_operands
# CHECK-LABEL: bb.3.done:
-# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0
+# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0
name: multiple_mem_operands
alignment: 1
@@ -90,27 +90,27 @@ body: |
successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
bb.2.else:
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
S_BRANCH %bb.3.done
@@ -119,7 +119,7 @@ body: |
successors: %bb.3.done(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
@@ -129,11 +129,11 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir
index a2d530b2fbed..4e31ac288072 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir
@@ -13,14 +13,14 @@
name: load_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -37,14 +37,14 @@ body: |
name: load_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -61,14 +61,14 @@ body: |
name: load_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -85,14 +85,14 @@ body: |
name: load_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -109,14 +109,14 @@ body: |
name: load_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -133,14 +133,14 @@ body: |
name: load_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -157,14 +157,14 @@ body: |
name: load_wavefront_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -181,14 +181,14 @@ body: |
name: load_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -205,14 +205,14 @@ body: |
name: load_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -229,14 +229,14 @@ body: |
name: load_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -253,14 +253,14 @@ body: |
name: load_workgroup_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -277,14 +277,14 @@ body: |
name: load_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -301,14 +301,14 @@ body: |
name: load_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -325,14 +325,14 @@ body: |
name: load_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -349,14 +349,14 @@ body: |
name: load_agent_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -373,14 +373,14 @@ body: |
name: load_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -397,14 +397,14 @@ body: |
name: load_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -421,14 +421,14 @@ body: |
name: load_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -445,14 +445,14 @@ body: |
name: load_system_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -469,14 +469,14 @@ body: |
name: load_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
S_ENDPGM 0
...
@@ -493,8 +493,8 @@ body: |
name: store_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -515,8 +515,8 @@ body: |
name: store_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -537,8 +537,8 @@ body: |
name: store_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -559,8 +559,8 @@ body: |
name: store_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -581,8 +581,8 @@ body: |
name: store_wavefront_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -603,8 +603,8 @@ body: |
name: store_wavefront_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -625,8 +625,8 @@ body: |
name: store_wavefront_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -647,8 +647,8 @@ body: |
name: store_wavefront_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -669,8 +669,8 @@ body: |
name: store_workgroup_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -691,8 +691,8 @@ body: |
name: store_workgroup_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -713,8 +713,8 @@ body: |
name: store_workgroup_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -735,8 +735,8 @@ body: |
name: store_workgroup_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -757,8 +757,8 @@ body: |
name: store_agent_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -779,8 +779,8 @@ body: |
name: store_agent_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -801,8 +801,8 @@ body: |
name: store_agent_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -823,8 +823,8 @@ body: |
name: store_agent_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -845,8 +845,8 @@ body: |
name: store_system_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -867,8 +867,8 @@ body: |
name: store_system_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -889,8 +889,8 @@ body: |
name: store_system_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -911,8 +911,8 @@ body: |
name: store_system_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -933,8 +933,8 @@ body: |
name: atomicrmw_singlethread_unordered
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -955,8 +955,8 @@ body: |
name: atomicrmw_singlethread_monotonic
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -977,8 +977,8 @@ body: |
name: atomicrmw_singlethread_acquire
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -999,8 +999,8 @@ body: |
name: atomicrmw_singlethread_release
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1021,8 +1021,8 @@ body: |
name: atomicrmw_singlethread_acq_rel
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
@@ -1043,8 +1043,8 @@ body: |
name: atomicrmw_singlethread_seq_cst
body: |
bb.0:
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
- $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.mir b/llvm/test/CodeGen/AMDGPU/memory_clause.mir
index 2b96140f737b..dd634b8d2b77 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.mir
@@ -1,13 +1,13 @@
# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: {{^}}name: vector_clause{{$}}
-# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
+# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: %5:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, implicit $exec
---
name: vector_clause
@@ -15,25 +15,25 @@ tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
%5:vreg_64 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %5, %2, 16, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %5, %3, 32, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %5, %4, 48, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %5, %2, 16, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %5, %3, 32, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %5, %4, 48, 0, implicit $exec
...
# This would be a valid soft clause, but there's no need for a KILL
# since the pointer uses are live beyond the end the clause.
# GCN-LABEL: {{^}}name: vector_clause_no_kill{{$}}
-# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
+# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
---
name: vector_clause_no_kill
@@ -41,23 +41,23 @@ tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_full{{$}}
-# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
+# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+# GCN-NEXT: %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
+# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
---
name: subreg_full
@@ -68,19 +68,19 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
- %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
- %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+ %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+ %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
+ %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_part{{$}}
-# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
+# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
---
name: subreg_part
@@ -91,17 +91,17 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
- %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+ %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+ %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: dead{{$}}
-# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
+# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
---
@@ -116,17 +116,17 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
- dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
+ dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+ dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_dead{{$}}
-# GCN: undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
+# GCN: undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
+# GCN-NEXT: dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
-# GCN-NEXT: GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, implicit $exec
---
name: subreg_dead
@@ -135,14 +135,14 @@ body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
- undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
- dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, 0, 0, 0, implicit $exec
+ undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
+ dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
+ GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: kill{{$}}
-# GCN: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
+# GCN: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %0{{$}}
@@ -154,16 +154,16 @@ body: |
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vreg_64 = IMPLICIT_DEF
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %2, %3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %2, %4, 16, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %2, %3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %2, %4, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: indirect{{$}}
-# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
+# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec
+# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
@@ -177,18 +177,18 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, 0, implicit $exec
+ %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: stack{{$}}
# GCN: %0:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, implicit $exec
+# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
---
name: stack
@@ -202,31 +202,31 @@ stack:
body: |
bb.0:
%0 = IMPLICIT_DEF
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: overflow_counter{{$}}
-# GCN: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, 0, implicit $exec
+# GCN: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec
+# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec
+# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
+# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, implicit $exec
+# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec
+# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, implicit $exec
+# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
+# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, implicit $exec
+# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, implicit $exec
+# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, implicit $exec
+# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, implicit $exec
+# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, implicit $exec
+# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
-# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, implicit $exec
+# GCN-NEXT: dead %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
@@ -236,34 +236,34 @@ body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec
- %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, 0, implicit $exec
- %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, 0, implicit $exec
- %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, 0, implicit $exec
- %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, 0, implicit $exec
- %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, 0, implicit $exec
- %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, 0, implicit $exec
- %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, 0, implicit $exec
- %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, 0, implicit $exec
- %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, 0, implicit $exec
- %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, 0, implicit $exec
- %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, 0, implicit $exec
- %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, 0, implicit $exec
- %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, 0, implicit $exec
- %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, 0, 0, 0, implicit $exec
- %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, 0, 0, 0, implicit $exec
+ %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec
+ %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec
+ %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, implicit $exec
+ %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec
+ %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, implicit $exec
+ %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
+ %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, implicit $exec
+ %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, implicit $exec
+ %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, implicit $exec
+ %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, implicit $exec
+ %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, implicit $exec
+ %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, implicit $exec
+ %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, implicit $exec
+ %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: reg_pressure{{$}}
-# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, 0, implicit $exec
+# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
+# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
-# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, implicit $exec
+# GCN-NEXT: dead %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
@@ -273,21 +273,21 @@ body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
- %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, 0, implicit $exec
- %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, 0, 0, 0, implicit $exec
- %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
+ %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, implicit $exec
+ %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 80, 0, implicit $exec
+ %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 96, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: image_clause{{$}}
-# GCN: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL undef %2:sgpr_128{{$}}
-# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: image_clause
@@ -303,18 +303,18 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
...
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
-# GCN: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %2{{$}}
# GCN-NEXT: KILL %0{{$}}
@@ -334,17 +334,17 @@ body: |
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
- %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: atomic{{$}}
# GCN: %1:vgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: dead %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
-# GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
-# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
-# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
+# GCN-NEXT: dead %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
+# GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
+# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
+# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
---
@@ -359,10 +359,10 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
- %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, -1, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_ATOMIC_ADD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
+ %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -370,10 +370,10 @@ body: |
# doesn't. Only the final use should be killed.
# GCN-LABEL: {{^}}name: ptr_use_after_clause{{$}}
-# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, 0, 0, 0, implicit $exec
+# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0
---
@@ -384,19 +384,19 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, 0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, implicit $exec
S_NOP 0, implicit %0
...
# Only part of the register is really live past the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg{{$}}
-# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
+# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
---
@@ -406,10 +406,10 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
...
@@ -417,10 +417,10 @@ body: |
# beyond the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg_multi{{$}}
-# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, 0, 0, 0, implicit $exec
+# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3_sub4, %0.sub7{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
---
@@ -430,19 +430,19 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
...
# Have subranges, but none of them are killed
# GCN-LABEL: {{^}}name: no_killed_subranges{{$}}
-# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
+# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
+# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub2_sub3{{$}}
---
@@ -452,10 +452,10 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
S_NOP 0, implicit %0.sub2_sub3
...
@@ -464,8 +464,8 @@ body: |
# live range extended.
# GCN-LABEL: name: no_killed_undef_subrange_use
-# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
+# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1{{$}}
# GCN-NEXT: %0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub2
@@ -476,8 +476,8 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1
undef %0.sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
+ %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
%0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
S_NOP 0, implicit %0.sub2
...
@@ -486,11 +486,11 @@ body: |
# clause
#
# GCN-LABEL: {{^}}name: implicit_def_no_break{{$}}
-# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
+# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: %3:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, implicit $exec
+# GCN-NEXT: %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, implicit $exec
# GCN-NEXT: KILL %3{{$}}
# GCN-NEXT: KILL %0{{$}}
---
@@ -500,18 +500,18 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
- %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
+ %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
%3:vreg_64 = IMPLICIT_DEF
- %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, 0, 0, 0, implicit $exec
- %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, 0, 0, 0, implicit $exec
+ %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, implicit $exec
+ %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, implicit $exec
S_NOP 0, implicit %1, implicit %2, implicit %4, implicit %5
...
# GCN-LABEL: {{^}}name: kill_part_subreg{{$}}
-# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
+# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1_sub2, %0.sub3
---
name: kill_part_subreg
@@ -520,15 +520,15 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, 0, implicit $exec
- %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, 0, implicit $exec
+ undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
+ %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
+ %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
S_NOP 0, implicit %1
...
# GCN-LABEL: {{^}}name: mem_clause_sreg256_used_stack{{$}}
-# GCN: undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0, 0
-# GCN-NEXT: dead %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0, 0
+# GCN: undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0
+# GCN-NEXT: dead %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0
# GCN-NEXT: KILL undef %1
---
name: mem_clause_sreg256_used_stack
@@ -541,8 +541,8 @@ machineFunctionInfo:
body: |
bb.0:
- undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0, 0
- %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0, 0
+ undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0
+ %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir
index 4059e33fbd1c..0d50dfb76cd2 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
# GFX10-LABEL: name: image_load_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -9,17 +9,17 @@ name: image_load_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_merged_v1v3_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
@@ -27,18 +27,18 @@ name: image_load_merged_v1v3_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_merged_v2v2
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
@@ -46,18 +46,18 @@ name: image_load_merged_v2v2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_merged_v2v2_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
@@ -65,18 +65,18 @@ name: image_load_merged_v2v2_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_merged_v3v1
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
@@ -84,18 +84,18 @@ name: image_load_merged_v3v1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_merged_v3v1_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
@@ -103,295 +103,295 @@ name: image_load_merged_v3v1_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_divided_merged
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
name: image_load_divided_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %9:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %7:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %11:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %9:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %7:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %11:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_divided_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_divided_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vreg_128 = COPY %2
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_dmask_overlapped_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_dmask_overlapped_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_dmask_not_disjoint_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 11, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_dmask_not_disjoint_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_0
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_0
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_1
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_3
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_4
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_4
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_5
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_5
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_6
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_6
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_7
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_7
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_8
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_8
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
%6:vgpr_32 = COPY %5.sub0
- %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_9
-# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_9
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_10
-# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_10
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_not_merged_11
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_11
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_mip_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -399,20 +399,20 @@ name: image_load_mip_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_mip_pck_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -420,20 +420,20 @@ name: image_load_mip_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_mip_pck_sgn_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -441,18 +441,18 @@ name: image_load_mip_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_pck_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -460,18 +460,18 @@ name: image_load_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_load_pck_sgn_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -479,12 +479,12 @@ name: image_load_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir
index 02dc0d8d4b38..cf26d4f7d84e 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
# GFX9-LABEL: name: image_load_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -9,18 +9,18 @@ name: image_load_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v1v3_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
@@ -28,18 +28,18 @@ name: image_load_merged_v1v3_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v2v2
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
@@ -47,18 +47,18 @@ name: image_load_merged_v2v2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v2v2_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
@@ -66,18 +66,18 @@ name: image_load_merged_v2v2_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v3v1
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
@@ -85,18 +85,18 @@ name: image_load_merged_v3v1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v3v1_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
@@ -104,276 +104,276 @@ name: image_load_merged_v3v1_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_divided_merged
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
name: image_load_divided_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_divided_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_divided_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vreg_128 = COPY %2
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_dmask_overlapped_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_dmask_overlapped_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_dmask_not_disjoint_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_0
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_0
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_1
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_10
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_10
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_3
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_4
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_4
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_5
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_5
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_6
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_6
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_7
-# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_7
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_8
-# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_8
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_9
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_load_not_merged_9
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -381,18 +381,18 @@ name: image_load_mip_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_pck_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -400,18 +400,18 @@ name: image_load_mip_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -419,18 +419,18 @@ name: image_load_mip_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_pck_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -438,18 +438,18 @@ name: image_load_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -457,13 +457,13 @@ name: image_load_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir
index 46de4e507049..c405a16418e3 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
# GFX10-LABEL: name: image_sample_l_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -9,17 +9,17 @@ name: image_sample_l_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_merged_v1v3_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
@@ -27,18 +27,18 @@ name: image_sample_l_merged_v1v3_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_merged_v2v2
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
@@ -46,18 +46,18 @@ name: image_sample_l_merged_v2v2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_merged_v2v2_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
@@ -65,18 +65,18 @@ name: image_sample_l_merged_v2v2_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_merged_v3v1
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
@@ -84,18 +84,18 @@ name: image_sample_l_merged_v3v1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_merged_v3v1_reversed
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
@@ -103,290 +103,290 @@ name: image_sample_l_merged_v3v1_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_divided_merged
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
name: image_sample_l_divided_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %8:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %9:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %7:vgpr_32, %7:vgpr_32, %7:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %11:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %8:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %9:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %7:vgpr_32, %7:vgpr_32, %7:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %11:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_divided_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_divided_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vreg_128 = COPY %2
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_dmask_overlapped_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_dmask_overlapped_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 11, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_dmask_not_disjoint_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_0
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_0
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_1
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_2
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_3
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_4
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_4
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_5
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_5
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_6
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_6
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_7
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_7
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_8
-# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_8
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_9
-# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_9
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_not_merged_10
-# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_10
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
@@ -394,7 +394,7 @@ body: |
# GFX10-LABEL: name: image_sample_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -402,19 +402,19 @@ name: image_sample_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_b_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -422,19 +422,19 @@ name: image_sample_b_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_b_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -442,19 +442,19 @@ name: image_sample_b_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_b_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -462,19 +462,19 @@ name: image_sample_b_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_b_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -482,19 +482,19 @@ name: image_sample_b_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -502,19 +502,19 @@ name: image_sample_c_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cd_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -522,19 +522,19 @@ name: image_sample_cd_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cd_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -542,19 +542,19 @@ name: image_sample_cd_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cd_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -562,19 +562,19 @@ name: image_sample_cd_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cd_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -582,19 +582,19 @@ name: image_sample_cd_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -602,19 +602,19 @@ name: image_sample_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -622,19 +622,19 @@ name: image_sample_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_b_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -642,19 +642,19 @@ name: image_sample_c_b_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_b_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -662,19 +662,19 @@ name: image_sample_c_b_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_b_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -682,19 +682,19 @@ name: image_sample_c_b_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_b_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -702,19 +702,19 @@ name: image_sample_c_b_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cd_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -722,19 +722,19 @@ name: image_sample_c_cd_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cd_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -742,19 +742,19 @@ name: image_sample_c_cd_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -762,19 +762,19 @@ name: image_sample_c_cd_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cd_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -782,19 +782,19 @@ name: image_sample_c_cd_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -802,19 +802,19 @@ name: image_sample_c_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -822,19 +822,19 @@ name: image_sample_c_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_d_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -842,19 +842,19 @@ name: image_sample_c_d_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_d_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -862,19 +862,19 @@ name: image_sample_c_d_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_d_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -882,19 +882,19 @@ name: image_sample_c_d_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_d_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -902,19 +902,19 @@ name: image_sample_c_d_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_l_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -922,19 +922,19 @@ name: image_sample_c_l_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_lz_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -942,19 +942,19 @@ name: image_sample_c_lz_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_lz_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -962,19 +962,19 @@ name: image_sample_c_lz_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_l_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -982,19 +982,19 @@ name: image_sample_c_l_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_c_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1002,19 +1002,19 @@ name: image_sample_c_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_d_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1022,19 +1022,19 @@ name: image_sample_d_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_d_cl_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1042,19 +1042,19 @@ name: image_sample_d_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_d_cl_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1062,19 +1062,19 @@ name: image_sample_d_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_d_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1082,19 +1082,19 @@ name: image_sample_d_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_lz_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1102,19 +1102,19 @@ name: image_sample_lz_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_lz_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1122,19 +1122,19 @@ name: image_sample_lz_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_l_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1142,19 +1142,19 @@ name: image_sample_l_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX10-LABEL: name: image_sample_o_merged_v1v3
-# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1162,12 +1162,12 @@ name: image_sample_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
index 1c51d3a4a104..0ce5c14f52db 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
# GFX9-LABEL: name: image_sample_l_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -9,18 +9,18 @@ name: image_sample_l_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
@@ -28,18 +28,18 @@ name: image_sample_l_merged_v1v3_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_merged_v2v2
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
@@ -47,18 +47,18 @@ name: image_sample_l_merged_v2v2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
@@ -66,18 +66,18 @@ name: image_sample_l_merged_v2v2_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
- %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
+ %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_merged_v3v1
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
@@ -85,18 +85,18 @@ name: image_sample_l_merged_v3v1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
@@ -104,295 +104,295 @@ name: image_sample_l_merged_v3v1_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_divided_merged
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
name: image_sample_l_divided_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
- %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_divided_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_divided_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vreg_128 = COPY %2
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_dmask_overlapped_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_dmask_overlapped_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_dmask_not_disjoint_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_0
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_0
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_1
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_2
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95
- %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_3
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_4
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_4
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_5
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_5
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_6
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_6
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_7
-# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_7
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_8
-# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_8
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_9
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_9
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_not_merged_10
-# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
-# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
name: image_sample_l_not_merged_10
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -400,18 +400,18 @@ name: image_sample_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_b_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -419,18 +419,18 @@ name: image_sample_b_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_b_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -438,18 +438,18 @@ name: image_sample_b_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -457,18 +457,18 @@ name: image_sample_b_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_b_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -476,18 +476,18 @@ name: image_sample_b_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -495,18 +495,18 @@ name: image_sample_c_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cd_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -514,18 +514,18 @@ name: image_sample_cd_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -533,18 +533,18 @@ name: image_sample_cd_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -552,18 +552,18 @@ name: image_sample_cd_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cd_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -571,18 +571,18 @@ name: image_sample_cd_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -590,18 +590,18 @@ name: image_sample_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -609,18 +609,18 @@ name: image_sample_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_b_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -628,18 +628,18 @@ name: image_sample_c_b_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -647,18 +647,18 @@ name: image_sample_c_b_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -666,18 +666,18 @@ name: image_sample_c_b_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -685,18 +685,18 @@ name: image_sample_c_b_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cd_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -704,18 +704,18 @@ name: image_sample_c_cd_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -723,18 +723,18 @@ name: image_sample_c_cd_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -742,18 +742,18 @@ name: image_sample_c_cd_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -761,18 +761,18 @@ name: image_sample_c_cd_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -780,18 +780,18 @@ name: image_sample_c_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -799,18 +799,18 @@ name: image_sample_c_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_d_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -818,18 +818,18 @@ name: image_sample_c_d_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -837,18 +837,18 @@ name: image_sample_c_d_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -856,18 +856,18 @@ name: image_sample_c_d_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -875,18 +875,18 @@ name: image_sample_c_d_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_l_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -894,18 +894,18 @@ name: image_sample_c_l_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_lz_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -913,18 +913,18 @@ name: image_sample_c_lz_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -932,18 +932,18 @@ name: image_sample_c_lz_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -951,18 +951,18 @@ name: image_sample_c_l_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_c_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -970,18 +970,18 @@ name: image_sample_c_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_d_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -989,18 +989,18 @@ name: image_sample_d_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_d_cl_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1008,18 +1008,18 @@ name: image_sample_d_cl_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1027,18 +1027,18 @@ name: image_sample_d_cl_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_d_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1046,18 +1046,18 @@ name: image_sample_d_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_lz_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1065,18 +1065,18 @@ name: image_sample_lz_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_lz_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1084,18 +1084,18 @@ name: image_sample_lz_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_l_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1103,18 +1103,18 @@ name: image_sample_l_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_sample_o_merged_v1v3
-# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
@@ -1122,12 +1122,12 @@ name: image_sample_o_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
- %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
- %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
- %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16)
+ %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4)
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
index ac78dbdc40e1..d1f5608edea2 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
@@ -155,25 +155,25 @@ body: |
%6:sreg_32_xm0_xexec = S_MOV_B32 0
%7:sreg_32_xm0 = S_MOV_B32 0
%8:sreg_64_xexec = REG_SEQUENCE killed %6, %subreg.sub0, %7, %subreg.sub1
- %9:sgpr_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0, 0 :: (invariant load 16, addrspace 6)
- %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0, 0 :: (dereferenceable invariant load 4)
+ %9:sgpr_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0 :: (invariant load 16, addrspace 6)
+ %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0 :: (dereferenceable invariant load 4)
%10:sreg_32_xm0_xexec = COPY %31.sub0
%11:sreg_32_xm0_xexec = COPY killed %31.sub1
- %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0, 0 :: (dereferenceable invariant load 4)
+ %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0 :: (dereferenceable invariant load 4)
%13:sreg_64 = V_CMP_NE_U32_e64 killed %11, 0, implicit $exec
%15:sreg_64 = V_CMP_NE_U32_e64 killed %12, 0, implicit $exec
%17:sreg_64_xexec = S_AND_B64 killed %13, killed %15, implicit-def dead $scc
S_CMP_EQ_U32 killed %10, 0, implicit-def $scc
- %18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0, 0 :: (dereferenceable invariant load 4)
+ %18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0 :: (dereferenceable invariant load 4)
S_ENDPGM 0
...
---
# CHECK-LABEL: merge_mmos
-# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, align 4)
-# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
-# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
+# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load 8, align 4)
+# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
+# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
name: merge_mmos
tracksRegLiveness: true
body: |
@@ -181,24 +181,24 @@ body: |
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 4)
- %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0, 0 :: (dereferenceable invariant load 4)
- %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
- %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
- BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
- BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
- %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
- %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
- BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
- BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
+ %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load 4)
+ %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0 :: (dereferenceable invariant load 4)
+ %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
+ %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
+ BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
+ BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
+ %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
+ BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
+ BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
S_ENDPGM 0
...
---
# CHECK-LABEL: reorder_offsets
-# CHECK-DAG: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.reorder_addr1 + 16, align 4, addrspace 1)
-# CHECK-DAG: BUFFER_STORE_DWORDX4_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into %ir.reorder_addr1, align 4, addrspace 1)
+# CHECK-DAG: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.reorder_addr1 + 16, align 4, addrspace 1)
+# CHECK-DAG: BUFFER_STORE_DWORDX4_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into %ir.reorder_addr1, align 4, addrspace 1)
name: reorder_offsets
tracksRegLiveness: true
@@ -208,12 +208,12 @@ body: |
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 4)
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 8)
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 12)
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 16)
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 20)
- BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 4)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 8, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 8)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 12, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 12)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 16)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 20)
+ BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1)
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
index 11395e99e523..2a55cfdd221f 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -6,7 +6,7 @@
#
# GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3
name: gfx9_tbuffer_load_x_xyz
@@ -17,13 +17,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3
name: gfx9_tbuffer_load_xyz_x
@@ -34,13 +34,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3
name: gfx9_tbuffer_load_xy_xy
@@ -51,13 +51,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
- %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_x_xy
-# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2
name: gfx9_tbuffer_load_x_xy
@@ -68,13 +68,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_xy_x
-# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2
name: gfx9_tbuffer_load_xy_x
@@ -85,14 +85,14 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_x_x
-# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
@@ -104,13 +104,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32
-# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
@@ -122,24 +122,24 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_float_32
-# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -153,30 +153,30 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_sint_32
-# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -190,30 +190,30 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_uint_32
-# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -227,15 +227,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -245,15 +245,15 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx9_tbuffer_load_not_merged_data_format_mismatch
@@ -264,15 +264,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -282,15 +282,15 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx9_tbuffer_load_not_merged_num_format_mismatch
body: |
bb.0.entry:
@@ -299,22 +299,22 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_store_x_xyz
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx9_tbuffer_store_x_xyz
body: |
bb.0.entry:
@@ -329,8 +329,8 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
...
---
@@ -339,7 +339,7 @@ body: |
# GFX9-LABEL: name: gfx9_tbuffer_store_xyz_x
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx9_tbuffer_store_xyz_x
body: |
bb.0.entry:
@@ -354,8 +354,8 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
- TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -363,7 +363,7 @@ body: |
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx9_tbuffer_store_xy_xy
body: |
bb.0.entry:
@@ -379,15 +379,15 @@ body: |
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
%15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_store_x_xy
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2
-# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx9_tbuffer_store_x_xy
body: |
bb.0.entry:
@@ -402,15 +402,15 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_store_xy_x
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx9_tbuffer_store_xy_x
body: |
bb.0.entry:
@@ -426,15 +426,15 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_store_x_x
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
-# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
name: gfx9_tbuffer_store_x_x
body: |
bb.0.entry:
@@ -448,14 +448,14 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_store_x_x_format_32_32_32_32
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
-# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
name: gfx9_tbuffer_store_x_x_format_32_32_32_32
body: |
bb.0.entry:
@@ -469,8 +469,8 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -490,14 +490,14 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 126, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx9_tbuffer_store_float32
body: |
bb.0.entry:
@@ -516,15 +516,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -544,14 +544,14 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx9_tbuffer_store_sint32
body: |
bb.0.entry:
@@ -570,15 +570,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -598,14 +598,14 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx9_tbuffer_store_uint32
body: |
bb.0.entry:
@@ -624,15 +624,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -651,15 +651,15 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
name: gfx9_tbuffer_store_not_merged_data_format_mismatch
body: |
bb.0.entry:
@@ -678,15 +678,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -705,15 +705,15 @@ body: |
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
name: gfx9_tbuffer_store_not_merged_num_format_mismatch
body: |
bb.0.entry:
@@ -732,22 +732,22 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx9_tbuffer_load_not_merged_swizzled_0
body: |
bb.0.entry:
@@ -756,15 +756,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx9_tbuffer_load_not_merged_swizzled_1
body: |
bb.0.entry:
@@ -773,8 +773,8 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -784,7 +784,7 @@ body: |
#
# GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3
name: gfx10_tbuffer_load_x_xyz
@@ -795,13 +795,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3
name: gfx10_tbuffer_load_xyz_x
@@ -812,13 +812,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+ %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3
name: gfx10_tbuffer_load_xy_xy
@@ -829,13 +829,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
- %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_x_xy
-# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2
name: gfx10_tbuffer_load_x_xy
@@ -846,13 +846,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_xy_x
-# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2
name: gfx10_tbuffer_load_xy_x
@@ -863,14 +863,14 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_x_x
-# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
@@ -882,13 +882,13 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32
-# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
@@ -900,24 +900,24 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_float_32
-# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -931,30 +931,30 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_sint_32
-# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -968,30 +968,30 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_uint_32
-# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
-# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
-# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0
@@ -1005,15 +1005,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -1023,15 +1023,15 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx10_tbuffer_load_not_merged_data_format_mismatch
@@ -1042,15 +1042,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -1060,15 +1060,15 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx10_tbuffer_load_not_merged_num_format_mismatch
body: |
bb.0.entry:
@@ -1077,15 +1077,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
@@ -1094,7 +1094,7 @@ body: |
# GFX10-LABEL: name: gfx10_tbuffer_store_x_xyz
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx10_tbuffer_store_x_xyz
body: |
bb.0.entry:
@@ -1109,8 +1109,8 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
...
---
@@ -1118,7 +1118,7 @@ body: |
# GFX10-LABEL: name: gfx10_tbuffer_store_xyz_x
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx10_tbuffer_store_xyz_x
body: |
bb.0.entry:
@@ -1133,8 +1133,8 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
- TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1142,7 +1142,7 @@ body: |
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
name: gfx10_tbuffer_store_xy_xy
body: |
bb.0.entry:
@@ -1158,15 +1158,15 @@ body: |
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
%15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_store_x_xy
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2
-# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx10_tbuffer_store_x_xy
body: |
bb.0.entry:
@@ -1181,15 +1181,15 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_store_xy_x
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx10_tbuffer_store_xy_x
body: |
bb.0.entry:
@@ -1205,15 +1205,15 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
%14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
- TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_store_x_x
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
-# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
name: gfx10_tbuffer_store_x_x
body: |
bb.0.entry:
@@ -1227,14 +1227,14 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_store_x_x_format_32_32_32_32
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
-# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
name: gfx10_tbuffer_store_x_x_format_32_32_32_32
body: |
bb.0.entry:
@@ -1248,8 +1248,8 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1269,14 +1269,14 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 77, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx10_tbuffer_store_float32
body: |
bb.0.entry:
@@ -1295,15 +1295,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1323,14 +1323,14 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 76, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx10_tbuffer_store_sint32
body: |
bb.0.entry:
@@ -1349,15 +1349,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1377,14 +1377,14 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
-# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2
# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 75, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2
-# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
name: gfx10_tbuffer_store_uint32
body: |
bb.0.entry:
@@ -1403,15 +1403,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1430,15 +1430,15 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
name: gfx10_tbuffer_store_not_merged_data_format_mismatch
body: |
bb.0.entry:
@@ -1457,15 +1457,15 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
@@ -1484,15 +1484,15 @@ body: |
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
-# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
name: gfx10_tbuffer_store_not_merged_num_format_mismatch
body: |
bb.0.entry:
@@ -1511,22 +1511,22 @@ body: |
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
- TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx10_tbuffer_load_not_merged_swizzled_0
body: |
bb.0.entry:
@@ -1535,15 +1535,15 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
name: gfx10_tbuffer_load_not_merged_swizzled_1
body: |
bb.0.entry:
@@ -1552,8 +1552,8 @@ body: |
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
- %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
- %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
index cd0e6231e4b1..256c1509d27f 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@@ -28,7 +28,7 @@
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-LABEL: bb.2:
@@ -52,7 +52,7 @@
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -77,7 +77,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -101,7 +101,7 @@ body: |
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-LABEL: bb.2:
@@ -125,7 +125,7 @@ body: |
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -150,7 +150,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -174,7 +174,7 @@ body: |
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-LABEL: bb.2:
@@ -198,7 +198,7 @@ body: |
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -223,7 +223,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -239,7 +239,7 @@ body: |
# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %14.sub0, %4.sub0, 0, implicit $exec
# ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec
# ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
-# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, implicit $exec
---
name: addr64
liveins:
@@ -259,7 +259,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
@@ -284,7 +284,7 @@ body: |
# W64-NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
# W64-NO-ADDR64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-NO-ADDR64-LABEL: bb.2:
@@ -306,7 +306,7 @@ body: |
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
-# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -320,7 +320,7 @@ body: |
# ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440
# ADDR64: [[ZERORSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3
# ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1
-# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, implicit $exec
---
name: offset
@@ -341,7 +341,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
- %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
$vgpr0 = COPY %7
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
index 1d1815ac4ecb..ff69a81f617c 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
@@ -34,7 +34,7 @@ body: |
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
- %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
+ %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
S_ENDPGM 0, implicit %7
...
@@ -67,6 +67,6 @@ body: |
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6
- %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
+ %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
S_ENDPGM 0, implicit %7
...
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
index b3aaffbfed10..dfd8c56a7ae8 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
@@ -8,8 +8,8 @@
name: hazard_image_sample_d_buf_off6
body: |
bb.0:
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: no_hazard_image_sample_d_buf_off1
@@ -19,8 +19,8 @@ body: |
name: no_hazard_image_sample_d_buf_off1
body: |
bb.0:
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: no_hazard_image_sample_d_buf_far
@@ -31,9 +31,9 @@ body: |
name: no_hazard_image_sample_d_buf_far
body: |
bb.0:
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
V_NOP_e32 implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec
...
# Non-NSA
@@ -44,8 +44,8 @@ body: |
name: no_hazard_image_sample_v4_v2_buf_off6
body: |
bb.0:
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec
...
# Less than 4 dwords
@@ -56,6 +56,6 @@ body: |
name: no_hazard_image_sample_v4_v3_buf_off6
body: |
bb.0:
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
index 44901a1ed9bb..611c5bb3271b 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
@@ -105,8 +105,8 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
%6 = COPY %7
%9 = S_MOV_B32 0
%10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -137,7 +137,7 @@ body: |
%28 = REG_SEQUENCE %6, 17, killed %27, 18
%29 = V_MOV_B32_e32 0, implicit $exec
%30 = COPY %24
- BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -203,9 +203,9 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
- %9 = S_LOAD_DWORDX2_IMM %3, 13, 0, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+ %9 = S_LOAD_DWORDX2_IMM %3, 13, 0
%6 = COPY %7
%10 = S_MOV_B32 0
%11 = REG_SEQUENCE %2, %subreg.sub0, killed %10, %subreg.sub1
@@ -243,7 +243,7 @@ body: |
%37 = REG_SEQUENCE %6, 17, killed %36, 18
%38 = V_MOV_B32_e32 0, implicit $exec
%39 = COPY %33
- BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
@@ -300,8 +300,8 @@ body: |
%3 = COPY $sgpr0_sgpr1
%2 = COPY $vgpr0
- %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0
- %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0
+ %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+ %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
%6 = COPY %7
%9 = S_MOV_B32 0
%10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1
@@ -332,7 +332,7 @@ body: |
%28 = REG_SEQUENCE %6, 17, killed %27, 18
%29 = V_MOV_B32_e32 0, implicit $exec
%30 = COPY %24
- BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec
bb.2.bb2:
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
index 85e308521562..3e36c46c47ea 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
@@ -151,7 +151,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -159,7 +159,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -188,7 +188,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -196,7 +196,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -225,7 +225,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -233,14 +233,14 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
-# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
# CHECK-NEXT: S_CBRANCH_EXECZ
@@ -255,7 +255,7 @@ body: |
$vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
- BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
S_CBRANCH_EXECZ %bb.2, implicit $exec
@@ -266,7 +266,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -274,7 +274,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -304,7 +304,7 @@ body: |
bb.1.if:
liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
@@ -312,7 +312,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -346,7 +346,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -356,7 +356,7 @@ body: |
$sgpr1 = S_MOV_B32 1
$sgpr2 = S_MOV_B32 -1
$sgpr3 = S_MOV_B32 61440
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -387,7 +387,7 @@ body: |
S_SLEEP 0, implicit $sgpr2_sgpr3
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -395,7 +395,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -426,7 +426,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -434,7 +434,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -463,7 +463,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -471,7 +471,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -500,7 +500,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -508,7 +508,7 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -539,7 +539,7 @@ body: |
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2.end:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -547,6 +547,6 @@ body: |
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
index 5b5ab2c106dd..3a6ff7c10ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
@@ -59,17 +59,17 @@ body: |
; MUBUF-V2A: liveins: $agpr0
; MUBUF-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
- ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr
; FLATSCR-V2A: liveins: $agpr0
; FLATSCR-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
- ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
- ; FLATSCR-V2A: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1 = IMPLICIT_DEF
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
@@ -93,19 +93,19 @@ body: |
; MUBUF-V2A: liveins: $agpr0
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
- ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-V2A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-V2A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr
; FLATSCR-V2A: liveins: $agpr0
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
- ; FLATSCR-V2A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr1_vgpr2, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 8 into %stack.0 + 4, align 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr1_vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 8 into %stack.0 + 4, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
- ; FLATSCR-V2A: $vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 8 from %stack.0 + 4, align 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 8 from %stack.0 + 4, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5)
@@ -131,11 +131,11 @@ body: |
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
- ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -143,11 +143,11 @@ body: |
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
- ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
- ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
@@ -173,13 +173,13 @@ body: |
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
- ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -187,13 +187,13 @@ body: |
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
- ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
- ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; FLATSCR-V2A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR-V2A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5)
@@ -221,13 +221,13 @@ body: |
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
- ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -237,13 +237,13 @@ body: |
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
- ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
- ; FLATSCR-V2A: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; FLATSCR-V2A: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5)
@@ -270,18 +270,18 @@ body: |
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@@ -290,12 +290,12 @@ body: |
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; FLATSCR-V2A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5)
@@ -323,33 +323,33 @@ body: |
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF-V2A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF-V2A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF-V2A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF-V2A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF-V2A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF-V2A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF-V2A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF-V2A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-V2A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF-V2A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF-V2A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF-V2A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF-V2A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF-V2A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF-V2A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF-V2A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -359,17 +359,17 @@ body: |
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; FLATSCR-V2A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr5_vgpr6_vgpr7, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 12 into %stack.0 + 20, align 4, addrspace 5)
- ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr5_vgpr6_vgpr7, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 12 into %stack.0 + 20, align 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; FLATSCR-V2A: $vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 12 from %stack.0 + 20, align 4, addrspace 5)
- ; FLATSCR-V2A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-V2A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 12 from %stack.0 + 20, align 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-V2A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
index 1851aadb719e..2865cef960c8 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
@@ -21,8 +21,8 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v1
; MUBUF: $vgpr0 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v1
; MUBUF-V2A: liveins: $agpr0
@@ -32,8 +32,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v1
; FLATSCR: $vgpr0 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v1
; FLATSCR-V2A: liveins: $agpr0
@@ -43,8 +43,8 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v1
; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1
; MUBUF-GFX90A-V2A: liveins: $agpr0
@@ -54,8 +54,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v1
; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1
; FLATSCR-GFX90A-V2A: liveins: $agpr0
@@ -83,10 +83,10 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v2
; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v2
; MUBUF-V2A: liveins: $agpr0, $agpr1
@@ -98,8 +98,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v2
; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v2
; FLATSCR-V2A: liveins: $agpr0, $agpr1
@@ -111,10 +111,10 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v2
; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1
@@ -126,8 +126,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v2
; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1
@@ -157,12 +157,12 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v3
; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v3
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -176,8 +176,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v3
; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v3
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -191,12 +191,12 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v3
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -210,8 +210,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v3
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2
@@ -243,14 +243,14 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v4
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v4
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@@ -266,8 +266,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v4
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v4
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@@ -283,14 +283,14 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v4
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@@ -306,8 +306,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v4
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@@ -341,16 +341,16 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v5
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v5
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -368,10 +368,10 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v5
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v5
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -389,16 +389,16 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v5
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -416,10 +416,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v5
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
@@ -455,18 +455,18 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v6
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v6
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5
@@ -486,10 +486,10 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v6
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v6
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5
@@ -509,18 +509,18 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v6
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5
@@ -540,10 +540,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v6
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5
@@ -581,22 +581,22 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v8
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v8
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7
@@ -620,10 +620,10 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v8
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v8
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7
@@ -647,22 +647,22 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v8
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7
@@ -686,10 +686,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v8
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7
@@ -731,38 +731,38 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v16
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v16
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
@@ -802,14 +802,14 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v16
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v16
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
@@ -849,38 +849,38 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v16
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
@@ -920,14 +920,14 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v16
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
@@ -985,70 +985,70 @@ body: |
bb.0.entry:
; MUBUF-LABEL: name: test_spill_v32
; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
- ; MUBUF: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
- ; MUBUF: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
- ; MUBUF: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
- ; MUBUF: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
- ; MUBUF: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
- ; MUBUF: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
- ; MUBUF: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
- ; MUBUF: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
- ; MUBUF: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
- ; MUBUF: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
- ; MUBUF: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
- ; MUBUF: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
- ; MUBUF: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
- ; MUBUF: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
- ; MUBUF: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
- ; MUBUF: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
+ ; MUBUF: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
+ ; MUBUF: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
+ ; MUBUF: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
+ ; MUBUF: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
+ ; MUBUF: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
+ ; MUBUF: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
+ ; MUBUF: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
+ ; MUBUF: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
+ ; MUBUF: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
+ ; MUBUF: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
+ ; MUBUF: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
+ ; MUBUF: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
+ ; MUBUF: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
+ ; MUBUF: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
+ ; MUBUF: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_v32
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31
@@ -1120,22 +1120,22 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v32
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
- ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v32
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31
@@ -1207,70 +1207,70 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_v32
; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
- ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
- ; MUBUF-GFX90A: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
- ; MUBUF-GFX90A: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
- ; MUBUF-GFX90A: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
- ; MUBUF-GFX90A: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
- ; MUBUF-GFX90A: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
- ; MUBUF-GFX90A: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
- ; MUBUF-GFX90A: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
- ; MUBUF-GFX90A: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
- ; MUBUF-GFX90A: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
- ; MUBUF-GFX90A: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
- ; MUBUF-GFX90A: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
- ; MUBUF-GFX90A: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
- ; MUBUF-GFX90A: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
- ; MUBUF-GFX90A: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
- ; MUBUF-GFX90A: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
+ ; MUBUF-GFX90A: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32
; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31
@@ -1342,22 +1342,22 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_v32
; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32
; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31
@@ -1448,8 +1448,8 @@ body: |
; MUBUF-LABEL: name: test_spill_a1
; MUBUF: $agpr0 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a1
@@ -1461,8 +1461,8 @@ body: |
; FLATSCR-LABEL: name: test_spill_a1
; FLATSCR: $agpr0 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a1
@@ -1473,8 +1473,8 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a1
; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1
; MUBUF-GFX90A-V2A: liveins: $vgpr0
@@ -1484,8 +1484,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a1
; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1
; FLATSCR-GFX90A-V2A: liveins: $vgpr0
@@ -1514,12 +1514,12 @@ body: |
; MUBUF-LABEL: name: test_spill_a2
; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a2
@@ -1533,12 +1533,12 @@ body: |
; FLATSCR-LABEL: name: test_spill_a2
; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a2
@@ -1551,10 +1551,10 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a2
; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1
@@ -1566,8 +1566,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a2
; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1
@@ -1598,16 +1598,16 @@ body: |
; MUBUF-LABEL: name: test_spill_a3
; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a3
@@ -1623,16 +1623,16 @@ body: |
; FLATSCR-LABEL: name: test_spill_a3
; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a3
@@ -1647,12 +1647,12 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a3
; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -1666,8 +1666,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a3
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -1700,20 +1700,20 @@ body: |
; MUBUF-LABEL: name: test_spill_a4
; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a4
@@ -1731,20 +1731,20 @@ body: |
; FLATSCR-LABEL: name: test_spill_a4
; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a4
@@ -1761,14 +1761,14 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a4
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1784,8 +1784,8 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a4
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1820,24 +1820,24 @@ body: |
; MUBUF-LABEL: name: test_spill_a5
; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a5
@@ -1857,24 +1857,24 @@ body: |
; FLATSCR-LABEL: name: test_spill_a5
; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a5
@@ -1893,16 +1893,16 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a5
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
@@ -1920,10 +1920,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a5
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
@@ -1960,28 +1960,28 @@ body: |
; MUBUF-LABEL: name: test_spill_a6
; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a6
@@ -2003,28 +2003,28 @@ body: |
; FLATSCR-LABEL: name: test_spill_a6
; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a6
@@ -2045,18 +2045,18 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a6
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -2076,10 +2076,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a6
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -2118,36 +2118,36 @@ body: |
; MUBUF-LABEL: name: test_spill_a8
; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a8
@@ -2173,36 +2173,36 @@ body: |
; FLATSCR-LABEL: name: test_spill_a8
; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a8
@@ -2227,22 +2227,22 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a8
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -2266,10 +2266,10 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a8
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -2312,68 +2312,68 @@ body: |
; MUBUF-LABEL: name: test_spill_a16
; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5)
; MUBUF: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5)
; MUBUF: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5)
; MUBUF: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5)
; MUBUF: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5)
; MUBUF: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5)
; MUBUF: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5)
; MUBUF: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a16
@@ -2415,68 +2415,68 @@ body: |
; FLATSCR-LABEL: name: test_spill_a16
; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5)
; FLATSCR: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5)
; FLATSCR: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5)
; FLATSCR: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5)
; FLATSCR: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5)
; FLATSCR: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5)
; FLATSCR: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5)
; FLATSCR: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5)
; FLATSCR: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a16
@@ -2517,38 +2517,38 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a16
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -2588,14 +2588,14 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a16
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -2654,132 +2654,132 @@ body: |
; MUBUF-LABEL: name: test_spill_a32
; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec
- ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5)
; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5)
; MUBUF: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5)
; MUBUF: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5)
; MUBUF: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5)
; MUBUF: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5)
; MUBUF: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5)
; MUBUF: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5)
; MUBUF: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 64, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 64, addrspace 5)
; MUBUF: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 68, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 68, addrspace 5)
; MUBUF: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 72, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 72, addrspace 5)
; MUBUF: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 76, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 76, addrspace 5)
; MUBUF: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 80, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 80, addrspace 5)
; MUBUF: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 84, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 84, addrspace 5)
; MUBUF: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 88, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 88, addrspace 5)
; MUBUF: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 92, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 92, addrspace 5)
; MUBUF: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 96, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 96, addrspace 5)
; MUBUF: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 100, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 100, addrspace 5)
; MUBUF: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 104, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 104, addrspace 5)
; MUBUF: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 108, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 108, addrspace 5)
; MUBUF: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 112, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 112, addrspace 5)
; MUBUF: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 116, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 116, addrspace 5)
; MUBUF: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 120, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 120, addrspace 5)
; MUBUF: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 124, addrspace 5)
+ ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 124, addrspace 5)
; MUBUF: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; MUBUF: S_ENDPGM 0
; MUBUF-V2A-LABEL: name: test_spill_a32
@@ -2853,132 +2853,132 @@ body: |
; FLATSCR-LABEL: name: test_spill_a32
; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec
- ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5)
; FLATSCR: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5)
; FLATSCR: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5)
; FLATSCR: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5)
; FLATSCR: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5)
; FLATSCR: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5)
; FLATSCR: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5)
; FLATSCR: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5)
; FLATSCR: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 64, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 64, addrspace 5)
; FLATSCR: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 68, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 68, addrspace 5)
; FLATSCR: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 72, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 72, addrspace 5)
; FLATSCR: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 76, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 76, addrspace 5)
; FLATSCR: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 80, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 80, addrspace 5)
; FLATSCR: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 84, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 84, addrspace 5)
; FLATSCR: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 88, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 88, addrspace 5)
; FLATSCR: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 92, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 92, addrspace 5)
; FLATSCR: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 96, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 96, addrspace 5)
; FLATSCR: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 100, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 100, addrspace 5)
; FLATSCR: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 104, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 104, addrspace 5)
; FLATSCR: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 108, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 108, addrspace 5)
; FLATSCR: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 112, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 112, addrspace 5)
; FLATSCR: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 116, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 116, addrspace 5)
; FLATSCR: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 120, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 120, addrspace 5)
; FLATSCR: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
- ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 124, addrspace 5)
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 124, addrspace 5)
; FLATSCR: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_a32
@@ -3051,70 +3051,70 @@ body: |
; FLATSCR-V2A: S_ENDPGM 0
; MUBUF-GFX90A-LABEL: name: test_spill_a32
; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
- ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
- ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0, addrspace 5)
- ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
- ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
- ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
- ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
- ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
- ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
- ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
- ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
- ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
- ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
- ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
- ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
- ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
- ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
- ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
- ; MUBUF-GFX90A: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
- ; MUBUF-GFX90A: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
- ; MUBUF-GFX90A: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
- ; MUBUF-GFX90A: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
- ; MUBUF-GFX90A: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
- ; MUBUF-GFX90A: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
- ; MUBUF-GFX90A: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
- ; MUBUF-GFX90A: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
- ; MUBUF-GFX90A: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
- ; MUBUF-GFX90A: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
- ; MUBUF-GFX90A: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
- ; MUBUF-GFX90A: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
- ; MUBUF-GFX90A: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
- ; MUBUF-GFX90A: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
- ; MUBUF-GFX90A: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
- ; MUBUF-GFX90A: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
+ ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
+ ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0, addrspace 5)
+ ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
+ ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
+ ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
+ ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
+ ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
+ ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
+ ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
+ ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
+ ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
+ ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
+ ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
+ ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
+ ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
+ ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
+ ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
+ ; MUBUF-GFX90A: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
+ ; MUBUF-GFX90A: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
+ ; MUBUF-GFX90A: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
+ ; MUBUF-GFX90A: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
+ ; MUBUF-GFX90A: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
+ ; MUBUF-GFX90A: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
+ ; MUBUF-GFX90A: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
+ ; MUBUF-GFX90A: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
+ ; MUBUF-GFX90A: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
+ ; MUBUF-GFX90A: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
+ ; MUBUF-GFX90A: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
+ ; MUBUF-GFX90A: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
+ ; MUBUF-GFX90A: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
+ ; MUBUF-GFX90A: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
+ ; MUBUF-GFX90A: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
+ ; MUBUF-GFX90A: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
; MUBUF-GFX90A: S_ENDPGM 0
; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32
; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
@@ -3186,22 +3186,22 @@ body: |
; MUBUF-GFX90A-V2A: S_ENDPGM 0
; FLATSCR-GFX90A-LABEL: name: test_spill_a32
; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
- ; FLATSCR-GFX90A: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
+ ; FLATSCR-GFX90A: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
; FLATSCR-GFX90A: S_ENDPGM 0
; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32
; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index 1fedbdc2cd93..8d4dbb7149d2 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -29,12 +29,12 @@ body: |
; CHECK: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr5 = S_MOV_B32 524288
- ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+ ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr4 = S_MOV_B32 524288
- ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+ ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
; CHECK: S_ENDPGM 0, implicit $vgpr0
bb.0:
$vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index f824c532d0b1..ed9dab94bf40 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -27,7 +27,7 @@ body: |
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $sgpr32 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
+ ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; GFX8: $sgpr32 = S_SUB_U32 $sgpr32, 8196, implicit-def $scc
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
@@ -35,7 +35,7 @@ body: |
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX8: $vcc_lo = S_MOV_B32 8192
; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
@@ -44,15 +44,15 @@ body: |
; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $vgpr0 = V_MOV_B32_e32 8196, implicit $exec
- ; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
+ ; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
+ ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9: $sgpr32 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
; GFX9: $sgpr32 = S_SUB_U32 $sgpr32, 8196, implicit-def $scc
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
@@ -60,7 +60,7 @@ body: |
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
@@ -68,15 +68,15 @@ body: |
; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9: $vgpr0 = V_MOV_B32_e32 8196, implicit $exec
- ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
+ ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
+ ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
+ ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
@@ -89,7 +89,7 @@ body: |
; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
+ ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
index b72b94ecf85c..509af0a08b7f 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
+++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
@@ -46,7 +46,7 @@ body: |
%15:sreg_32_xm0 = S_MOV_B32 61440
%16:sreg_32_xm0 = S_MOV_B32 -1
%17:sgpr_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3
- BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
%19:vgpr_32 = COPY %4
%20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.3
diff --git a/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir b/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
index 9a5b9f80abee..901e7dee5f2a 100644
--- a/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
@@ -16,7 +16,7 @@ body: |
; CHECK-LABEL: name: kill_flag_use_first_bundle_inst
; CHECK: liveins: $sgpr4_sgpr5, $sgpr7
- ; CHECK: renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0
+ ; CHECK: renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0
; CHECK: $m0 = S_MOV_B32 -1
; CHECK: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec {
@@ -27,7 +27,7 @@ body: |
; CHECK: DS_GWS_BARRIER killed $vgpr0, 8, implicit $m0, implicit $exec
; CHECK: S_WAITCNT 0
; CHECK: }
- renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0
+ renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $exec
BUNDLE implicit $vgpr0, implicit $m0, implicit $exec {
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index bf3946a2ffab..3d34eb01699a 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -7,33 +7,33 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: bundle_memops
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit undef $vgpr3_vgpr4, implicit $exec {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr5, implicit-def $vgpr5_lo16, implicit-def $vgpr5_hi16, implicit undef $vgpr0_vgpr1, implicit $exec, implicit undef $vgpr3_vgpr4 {
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, implicit $exec
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
+ ; GCN: $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
; GCN: BUNDLE implicit undef $vgpr3_vgpr4, implicit $vgpr1, implicit $exec, implicit $vgpr0 {
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
; GCN: }
; GCN: S_NOP 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: S_NOP 0
- ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
@@ -44,70 +44,70 @@ body: |
; GCN: }
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
- ; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
- ; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
+ ; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
+ ; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0
; GCN: }
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr2, implicit $exec, implicit $vgpr1 {
- ; GCN: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit $vgpr0, implicit $vgpr2_vgpr3, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec {
- ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
- ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
; GCN: }
; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
- ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
; GCN: }
; GCN: S_NOP 0
- ; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
- ; GCN: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
+ ; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0
+ ; GCN: $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0
; GCN: S_NOP 0
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0, implicit $exec, implicit $vgpr1 {
; GCN: $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
; GCN: $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
; GCN: }
- $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
S_NOP 0
- $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
S_NOP 0
- $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, 0, 0, 0, implicit $exec
- $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
- $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 4, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
+ $vgpr5 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
S_NOP 0
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
S_NOP 0
- $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
S_NOP 0
- GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
DS_WRITE_B32_gfx9 $vgpr0, $vgpr2, 0, 0, implicit killed $m0, implicit $exec
DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
S_NOP 0
- $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
- $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
- $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
- IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
+ $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
S_NOP 0
- $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
- $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0, 0
+ $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0
+ $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0
S_NOP 0
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
@@ -123,13 +123,13 @@ body: |
; GCN-LABEL: name: bundle_dbg_value_0
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: DBG_VALUE internal $vgpr0, 0, 0
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
DBG_VALUE $vgpr0, 0, 0
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
...
@@ -143,16 +143,16 @@ body: |
; GCN-LABEL: name: bundle_dbg_value_1
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: DBG_VALUE internal $vgpr0, 0, 0
; GCN: DBG_VALUE $vgpr1, 0, 0
- ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
; GCN: DBG_VALUE $vgpr2, 0, 0
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
DBG_VALUE $vgpr0, 0, 0
DBG_VALUE $vgpr1, 0, 0
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
DBG_VALUE $vgpr2, 0, 0
...
@@ -167,15 +167,15 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
; GCN: DBG_VALUE $vgpr1, 0, 0
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: DBG_VALUE internal $vgpr0, 0, 0
- ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
; GCN: DBG_VALUE $vgpr2, 0, 0
DBG_VALUE $vgpr1, 0, 0
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
DBG_VALUE $vgpr0, 0, 0
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
DBG_VALUE $vgpr2, 0, 0
...
@@ -189,14 +189,14 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: KILL $vgpr1
- ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
KILL $vgpr1
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
...
@@ -210,14 +210,14 @@ body: |
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: KILL internal $vgpr0
- ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
KILL $vgpr0
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
...
@@ -232,11 +232,11 @@ body: |
; GCN-LABEL: name: post_bundle_kill
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
...
@@ -249,13 +249,13 @@ body: |
; GCN-LABEL: name: post_bundle_kill_other
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
; GCN: KILL killed $vgpr7
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr7
...
@@ -269,13 +269,13 @@ body: |
; GCN-LABEL: name: post_bundle_kill_plus_other
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
; GCN: KILL killed $vgpr7, killed $vgpr3
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr7, killed $vgpr3
...
@@ -287,11 +287,11 @@ body: |
; GCN-LABEL: name: post_bundle_multi_kill_0
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr3_vgpr4
KILL killed $vgpr5_vgpr6
...
@@ -305,11 +305,11 @@ body: |
; GCN-LABEL: name: post_bundle_multi_kill_1
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
; GCN: }
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr3
KILL $vgpr4
KILL killed $vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
index d43d8a96601d..d39f407f1cf2 100644
--- a/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
+++ b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
@@ -16,6 +16,6 @@ body: |
S_BARRIER
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32_e64 undef $vgpr0, undef $vgpr0, 0, 0, 0, 2, implicit $mode, implicit $exec
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
index 850435312b74..a0f970879da0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
@@ -1,14 +1,14 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
# GFX10-LABEL: name:
diff oporder_add
-# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0, 0
-# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0, 0
+# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
+# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
name:
diff oporder_add
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -34,12 +34,12 @@ body: |
%26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6144
%33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
...
---
@@ -48,20 +48,20 @@ body: |
# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
-# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0, 0
-# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
+# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
#
# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
-# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
+# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
name: LowestInMiddle
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -87,17 +87,17 @@ body: |
%26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6400
%33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 11200
%40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
...
---
@@ -106,19 +106,19 @@ body: |
# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
-# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
-# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
-# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0
name: NegativeDistance
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -144,17 +144,17 @@ body: |
%26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 8192
%33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 10240
%40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
...
---
@@ -163,7 +163,7 @@ name: assert_hit
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -190,13 +190,13 @@ body: |
%26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
...
---
# GFX10-LABEL: name:
diff oporder_add_store
-# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 0, 0
-# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 0, 0
+# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0
+# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
name:
diff oporder_add_store
body: |
@@ -208,11 +208,11 @@ body: |
%2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
%4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
- GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
%8:sgpr_32 = S_MOV_B32 3000
%9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
%11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
- GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index c16ca8edb371..91b312dfdf95 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -1,14 +1,14 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
# GFX9-LABEL: name:
diff oporder_add
-# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0, 0
-# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0, 0
+# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
+# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
name:
diff oporder_add
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -34,12 +34,12 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6144
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
...
---
@@ -48,20 +48,20 @@ body: |
# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
-# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0, 0
+# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
#
# GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
# GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
-# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
-# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
+# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
+# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
name: LowestInMiddle
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -87,17 +87,17 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 6400
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 11200
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
...
---
@@ -106,15 +106,15 @@ body: |
# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
-# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0, 0
-# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
-# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
+# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
+# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
+# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
name: NegativeDistance
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -140,17 +140,17 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
%32:sgpr_32 = S_MOV_B32 8192
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
- %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, 0, implicit $exec
+ %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
%39:sgpr_32 = S_MOV_B32 10240
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
- %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, 0, implicit $exec
+ %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
...
---
@@ -159,7 +159,7 @@ name: assert_hit
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%4:sreg_32_xm0 = COPY $sgpr101
%5:sreg_32_xm0 = S_MOV_B32 0
@@ -186,13 +186,13 @@ body: |
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
- %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, 0, implicit $exec
+ %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
...
---
# GFX9-LABEL: name:
diff oporder_add_store
-# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 0, 0, 0
-# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 0, 0, 0
+# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
+# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
name:
diff oporder_add_store
body: |
@@ -204,11 +204,11 @@ body: |
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
- GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
%8:sgpr_32 = S_MOV_B32 3000
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
- GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
index 4ec62f8a306a..e7660a14de91 100644
--- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
+++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
@@ -22,7 +22,7 @@ body: |
$sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
$sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
- $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0, 0
+ $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0
S_WAITCNT 127
$vgpr0 = V_XOR_B32_e32 killed $sgpr10, killed $vgpr0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
index 09366f814361..f904763e0a2f 100644
--- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
+++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
@@ -29,7 +29,7 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- GLOBAL_STORE_DWORD %1, %0, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %1, %0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -45,7 +45,7 @@ body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX2 %1, %0, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %1, %0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -241,15 +241,15 @@ body: |
%14 = IMPLICIT_DEF
%15 = IMPLICIT_DEF
%2 = V_AND_B32_e32 %1, %0, implicit $exec
- GLOBAL_STORE_DWORD %3, %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %4, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %3, %5, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %6, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %8, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %9, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %4, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %3, %5, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %6, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %8, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %9, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -287,18 +287,18 @@ body: |
%11 = IMPLICIT_DEF
%12 = IMPLICIT_DEF
%2 = V_AND_B32_e32 %1, %0, implicit $exec
- GLOBAL_STORE_DWORD %3, %0, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %4, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %3, %5, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %6, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %7, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %8, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %9, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %10, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %3, %11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD %3, %12, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %4, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %3, %5, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %6, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %7, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %8, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %9, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %10, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %3, %11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %3, %12, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -319,8 +319,8 @@ body: |
...
# GCN-LABEL: smem_bundle{{$}}
-# GCN: S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr0_sgpr1_sgpr2_sgpr3, renamable $sgpr14, 0, 0
-# GCN: S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr0_sgpr1_sgpr2_sgpr3, renamable $sgpr15, 0, 0
+# GCN: S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr0_sgpr1_sgpr2_sgpr3, renamable $sgpr14, 0
+# GCN: S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr0_sgpr1_sgpr2_sgpr3, renamable $sgpr15, 0
---
name: smem_bundle
tracksRegLiveness: true
@@ -336,8 +336,8 @@ body: |
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
early-clobber %3, early-clobber %4 = BUNDLE %0, %1, %2 {
- %3 = S_BUFFER_LOAD_DWORD_SGPR %0, %1, 0, 0
- %4 = S_BUFFER_LOAD_DWORD_SGPR %0, %2, 0, 0
+ %3 = S_BUFFER_LOAD_DWORD_SGPR %0, %1, 0
+ %4 = S_BUFFER_LOAD_DWORD_SGPR %0, %2, 0
}
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
index 746581529402..33bb91e7b2c2 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
@@ -185,9 +185,9 @@ body: |
bb.28:
%9 = S_FF1_I32_B32 undef %10
%13 = V_MAD_U32_U24_e64 killed %9, 48, 32, 0, implicit $exec
- %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
%46 = V_AND_B32_e32 1, killed %45, implicit $exec
- %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4)
+ %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4)
%25 = nofpexcept V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $mode, implicit $exec
%26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec
%62 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
index 6d1df163ec82..2e0a96027068 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir
@@ -96,14 +96,14 @@ body: |
%0.sub0 = COPY killed %12
%21 = COPY killed %18
%21.sub0 = COPY killed %15
- %22 = S_LOAD_DWORD_IMM killed %21, 2, 0, 0
+ %22 = S_LOAD_DWORD_IMM killed %21, 2, 0
%23 = S_MOV_B32 491436
undef %24.sub0 = COPY killed %22
%24.sub1 = COPY killed %23
- %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0, 0
+ %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0
%1 = COPY killed %25
- %26 = S_LOAD_DWORDX2_IMM %0, 2, 0, 0
- dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0, 0
+ %26 = S_LOAD_DWORDX2_IMM %0, 2, 0
+ dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0
S_CBRANCH_SCC0 %bb.1, implicit undef $scc
bb.5:
@@ -129,8 +129,8 @@ body: |
bb.2:
%4 = COPY killed %59
%3 = COPY killed %58
- %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0, 0
- %40 = S_LOAD_DWORD_IMM killed %39, 0, 0, 0
+ %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0
+ %40 = S_LOAD_DWORD_IMM killed %39, 0, 0
%43 = V_MOV_B32_e32 -1102263091, implicit $exec
%60 = COPY killed %4
%61 = COPY killed %3
diff --git a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
index 6f114a1d881a..069a21509c07 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
@@ -56,8 +56,8 @@ body: |
%3 = COPY killed $vgpr0
%0 = COPY killed $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORD_IMM killed %0, 13, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
%18 = V_ASHRREV_I32_e32 31, %3, implicit $exec
undef %19.sub0 = COPY killed %3
%19.sub1 = COPY killed %18
@@ -70,7 +70,7 @@ body: |
%13.sub2_sub3 = COPY killed %12
%20 = V_LSHL_B64_e64 killed %19, 2, implicit $exec
%16 = COPY killed %5
- BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
+ BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
index bff9fa74fc1e..dd5b4a9ba438 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir
@@ -16,7 +16,7 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %21, implicit $mode, implicit $exec
%108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec
undef %109.sub1:vreg_128 = COPY %108
- %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0, 0 :: (dereferenceable invariant load 4)
+ %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0 :: (dereferenceable invariant load 4)
S_CMP_EQ_U32 killed %28, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc
@@ -47,7 +47,7 @@ body: |
S_BRANCH %bb.6
bb.6:
- %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0, 0 :: (dereferenceable invariant load 4)
+ %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0 :: (dereferenceable invariant load 4)
%39:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32_e64 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%41:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $mode, implicit $exec
@@ -83,7 +83,7 @@ body: |
S_BRANCH %bb.8
bb.8:
- dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0, 0 :: (dereferenceable invariant load 4)
+ dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0 :: (dereferenceable invariant load 4)
%138:vreg_128 = COPY killed %111
bb.9:
diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
index 97d6a0c89370..7aff14aa0a04 100644
--- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
@@ -62,12 +62,12 @@ body: |
bb.3:
%1 = COPY killed %17
- FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit $exec, implicit $flat_scr
%14 = COPY %1.sub1
%16 = COPY killed %1.sub0
undef %15.sub0 = COPY killed %16
%15.sub1 = COPY killed %14
- FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -132,10 +132,10 @@ body: |
%6.sub2 = COPY %6.sub0
bb.2:
- BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$sgpr30_sgpr31 = COPY %5
S_SETPC_B64_return $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir b/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir
index 42e2b154c3f8..71d5ca33be37 100644
--- a/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir
+++ b/llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir
@@ -11,19 +11,19 @@ body: |
; GCN-NOT: early-clobber
; GCN-NOT: KILL
%0:vreg_64_align2 = IMPLICIT_DEF
- undef %1.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -208, 0, 0, 0, 0, implicit $exec
- %1.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -224, 0, 0, 0, 0, implicit $exec
- %1.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -240, 0, 0, 0, 0, implicit $exec
- dead %1.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -256, 0, 0, 0, 0, implicit $exec
- undef %2.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -80, 0, 0, 0, 0, implicit $exec
- %2.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -96, 0, 0, 0, 0, implicit $exec
- %2.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -112, 0, 0, 0, 0, implicit $exec
- dead %2.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -128, 0, 0, 0, 0, implicit $exec
- undef %3.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, 0, implicit $exec
- %3.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, 0, implicit $exec
- %3.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, 0, implicit $exec
- dead %3.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
- undef %4.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 176, 0, 0, 0, 0, implicit $exec
- %4.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 160, 0, 0, 0, 0, implicit $exec
- %4.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 144, 0, 0, 0, 0, implicit $exec
+ undef %1.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -208, 0, implicit $exec
+ %1.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -224, 0, implicit $exec
+ %1.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -240, 0, implicit $exec
+ dead %1.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -256, 0, implicit $exec
+ undef %2.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -80, 0, implicit $exec
+ %2.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -96, 0, implicit $exec
+ %2.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -112, 0, implicit $exec
+ dead %2.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, -128, 0, implicit $exec
+ undef %3.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
+ %3.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
+ %3.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
+ dead %3.sub0_sub1_sub2_sub3:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
+ undef %4.sub12_sub13_sub14_sub15:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 176, 0, implicit $exec
+ %4.sub8_sub9_sub10_sub11:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 160, 0, implicit $exec
+ %4.sub4_sub5_sub6_sub7:areg_512_align2 = GLOBAL_LOAD_DWORDX4 %0, 144, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir b/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
index 59678723c274..c2a725640261 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
+++ b/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
@@ -58,7 +58,7 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
S_ENDPGM 0
...
---
@@ -76,7 +76,7 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
S_DCACHE_WB
S_ENDPGM 0
...
@@ -97,7 +97,7 @@ machineFunctionInfo:
body: |
bb.0:
S_DCACHE_WB
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
S_ENDPGM 0
...
---
@@ -132,11 +132,11 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
S_ENDPGM 0
bb.1:
- S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
S_ENDPGM 0
...
...
@@ -164,7 +164,7 @@ body: |
S_ENDPGM 0
bb.1:
- S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0
S_ENDPGM 0
...
---
@@ -182,6 +182,6 @@ machineFunctionInfo:
body: |
bb.0:
- S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0
+ S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0
SI_RETURN_TO_EPILOG undef $vgpr0
...
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index f5fdfbf18113..7a78608de113 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -27,7 +27,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
@@ -51,7 +51,7 @@ body: |
%4:vreg_512 = COPY %0
bb.1:
- BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
+ BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
%6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
%8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
%9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index dffe920aacef..48bae9beb1d1 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -23,12 +23,12 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
- ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
; CHECK: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
@@ -42,17 +42,17 @@ body: |
; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
- ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF2]], 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: [[DEF1]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, 0, implicit $exec
- ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF6]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF2]], 0, 0, implicit $exec
+ ; CHECK: [[DEF1]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+ ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+ ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF6]], 0, 0, implicit $exec
; CHECK: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF1]], implicit $exec
- ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF7]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF7]], 0, 0, implicit $exec
; CHECK: S_NOP 0, implicit [[DEF5]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF4]], implicit [[V_MOV_B32_e32_]]
- ; CHECK: GLOBAL_STORE_DWORD [[DEF7]], [[V_MOV_B32_e32_1]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORD [[DEF7]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
@@ -69,14 +69,14 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vreg_64 = IMPLICIT_DEF
- %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, 0, 0, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, 0, 0, implicit $exec
+ %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec
undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
%4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
%5:vreg_64 = COPY %2
undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec
%6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec
- %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, 0, 0, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec
%8:vreg_64 = IMPLICIT_DEF
%9:vreg_64 = IMPLICIT_DEF
%10:vreg_64 = IMPLICIT_DEF
@@ -90,15 +90,15 @@ body: |
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec
%19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec
- GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, 0, implicit $exec
- %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, 0, 0, implicit $exec
- %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, 0, 0, implicit $exec
- %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, 0, implicit $exec
- %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, 0, 0, implicit $exec
- %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
+ %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec
+ %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec
+ %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+ %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec
+ %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec
%23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec
S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
- GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec
bb.1:
S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index 06bbc9450fa7..142fe4e3d4d6 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -200,12 +200,12 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%0:vgpr_32 = COPY $vgpr0
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0, 0
- %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0, 0
- %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0, 0
+ %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
+ %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
+ %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
%11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead $scc
%12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1
%13:vgpr_32 = V_MUL_LO_I32_e64 0, %0, implicit $exec
@@ -217,29 +217,29 @@ body: |
%19:sreg_32_xm0_xexec = IMPLICIT_DEF
%20:vgpr_32 = V_ADD_CO_U32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
%21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20, 12, %7, 0, implicit $exec
- %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, 0, implicit $exec
+ %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, implicit $exec
%24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20, 48, %8, 0, implicit $exec
%26:vreg_128 = IMPLICIT_DEF
- undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0, 0
+ undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0
%27.sub1:sreg_64_xexec = S_MOV_B32 0
%28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def $scc
%29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead $scc, implicit killed $scc
- undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0, 0
+ undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0
%27.sub0:sreg_64_xexec = IMPLICIT_DEF
%31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
%32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def $scc
%33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead $scc, implicit killed $scc
%34:vgpr_32 = IMPLICIT_DEF
%35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23, %34, 0, 0, implicit $exec
- %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, 0, 0, implicit $exec
+ %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, implicit $exec
undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec
%38.sub0:vreg_64 = COPY %37.sub0
%39:vreg_64 = V_LSHLREV_B64_e64 3, %38, implicit $exec
undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0, 0, implicit $exec
%42:vgpr_32 = COPY %33
%40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec
- %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
+ %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
undef %45.sub1:vreg_64 = IMPLICIT_DEF
%45.sub0:vreg_64 = COPY %37.sub1
%46:vreg_64 = V_LSHLREV_B64_e64 3, %45, implicit $exec
@@ -247,7 +247,7 @@ body: |
%49:vgpr_32 = COPY %33
%47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec
%51:vreg_64 = IMPLICIT_DEF
- undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
+ undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, implicit $exec :: (load 4 from %ir.18 + 8)
%52.sub1:vreg_64 = IMPLICIT_DEF
%53:vreg_64 = V_LSHLREV_B64_e64 3, %52, implicit $exec
undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0, 0, implicit $exec
@@ -258,31 +258,31 @@ body: |
%59:sreg_64 = IMPLICIT_DEF
%60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc
%61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc
- %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
+ %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4)
undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec
%63.sub0:vreg_64 = COPY %62.sub0
%64:vreg_64 = IMPLICIT_DEF
undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %64.sub0, 0, implicit $exec
%67:vgpr_32 = COPY %61
%65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec
- %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
+ %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
undef %70.sub1:vreg_64 = IMPLICIT_DEF
%70.sub0:vreg_64 = IMPLICIT_DEF
%71:vreg_64 = IMPLICIT_DEF
undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %71.sub0, 0, implicit $exec
%74:vgpr_32 = COPY %61
%72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec
- %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, 0, implicit $exec
+ %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, implicit $exec
%77:vgpr_32 = IMPLICIT_DEF
%78:vgpr_32 = IMPLICIT_DEF
%79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77, implicit $mode, implicit $exec
%80:vgpr_32 = IMPLICIT_DEF
%81:vgpr_32 = IMPLICIT_DEF
%84:vgpr_32 = IMPLICIT_DEF
- BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, implicit $exec
%85:vgpr_32 = IMPLICIT_DEF
%86:vgpr_32 = IMPLICIT_DEF
%87:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index e5b48d903aa5..27e341296713 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -25,8 +25,8 @@ body: |
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr4_sgpr5
; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
- ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329
; CHECK: undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -37,7 +37,7 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead %11
- ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
@@ -52,7 +52,7 @@ body: |
; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
- ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; CHECK: [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
@@ -68,10 +68,10 @@ body: |
; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec
; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
- ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+ ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load 4, addrspace 1)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */
; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
- ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec
; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: S_BRANCH %bb.1
@@ -79,8 +79,8 @@ body: |
liveins: $sgpr4_sgpr5
%0:sgpr_64(p4) = COPY $sgpr4_sgpr5
- %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
- %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
%4:sreg_32_xm0 = S_MOV_B32 5329
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -91,7 +91,7 @@ body: |
bb.1:
INLINEASM &"", 1, 851978, def %11:vgpr_32
- GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, implicit $exec :: (store 4, addrspace 1)
%13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32
%17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec
@@ -108,7 +108,7 @@ body: |
DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
- undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%33:vgpr_32 = V_MUL_LO_U32_e64 %25, %4, implicit $exec
%10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec
%34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec
@@ -122,10 +122,10 @@ body: |
undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec
undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
%42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
- %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+ %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load 4, addrspace 1)
INLINEASM &"", 1
%44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
- GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, implicit $exec :: (store 4, addrspace 1)
%31.sub0:vreg_64 = COPY %43, implicit $exec
DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
index e404ce3ceaad..b5163239c102 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
@@ -12,15 +12,15 @@ body: |
; CHECK-LABEL: name: denorm_mode_not_barrier
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4)
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD1]], implicit $exec
; CHECK: S_DENORM_MODE 0, implicit-def $mode, implicit $mode
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e32_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
- %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4)
%3:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec
S_ENDPGM 0, implicit %3
...
@@ -35,15 +35,15 @@ body: |
; CHECK-LABEL: name: round_mode_not_barrier
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4)
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD1]], implicit $exec
; CHECK: S_ROUND_MODE 0, implicit-def $mode, implicit $mode
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e32_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4)
S_ROUND_MODE 0, implicit-def $mode, implicit $mode
- %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4)
%3:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec
S_ENDPGM 0, implicit %3
...
@@ -58,17 +58,17 @@ body: |
; CHECK-LABEL: name: denorm_mode_mode_def_use
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4)
+ ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4)
; CHECK: S_DENORM_MODE 0, implicit-def $mode, implicit $mode
; CHECK: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 0, [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[V_ADD_F32_e32_]], implicit $exec
; CHECK: S_ENDPGM 0, implicit [[V_ADD_F32_e32_]], implicit [[V_ADD_U32_e32_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
%2:vgpr_32 = V_ADD_F32_e32 0, %1, implicit $mode, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4)
%4:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec
S_ENDPGM 0, implicit %2, implicit %4
...
@@ -83,17 +83,17 @@ body: |
; CHECK-LABEL: name: round_mode_mode_def_use
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4)
- ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4)
+ ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4)
; CHECK: S_ROUND_MODE 0, implicit-def $mode, implicit $mode
; CHECK: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 0, [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[V_ADD_F32_e32_]], implicit $exec
; CHECK: S_ENDPGM 0, implicit [[V_ADD_F32_e32_]], implicit [[V_ADD_U32_e32_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
- %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4)
S_ROUND_MODE 0, implicit-def $mode, implicit $mode
%2:vgpr_32 = V_ADD_F32_e32 0, %1, implicit $mode, implicit $exec
- %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec :: (load 4)
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4)
%4:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec
S_ENDPGM 0, implicit %2, implicit %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
index 0211f4294f75..d08cfd28d0ab 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
@@ -27,16 +27,16 @@ body: |
; CHECK: %9.sub2:sgpr_128 = V_READFIRSTLANE_B32 %5.sub2, implicit $exec
; CHECK: %9.sub3:sgpr_128 = V_READFIRSTLANE_B32 %4.sub3, implicit $exec
; CHECK: S_BARRIER
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %9, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %9, 0, 0, 0, 0, 0, implicit $exec
; CHECK: undef %12.sub0:sgpr_128 = V_READFIRSTLANE_B32 %3.sub0, implicit $exec
; CHECK: %12.sub1:sgpr_128 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec
; CHECK: %12.sub2:sgpr_128 = V_READFIRSTLANE_B32 %1.sub2, implicit $exec
; CHECK: %12.sub3:sgpr_128 = V_READFIRSTLANE_B32 %0.sub3, implicit $exec
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %12, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %12, 0, 0, 0, 0, 0, implicit $exec
; CHECK: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[BUFFER_LOAD_DWORD_OFFSET]], [[BUFFER_LOAD_DWORD_OFFSET]], implicit $exec
; CHECK: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[BUFFER_LOAD_DWORD_OFFSET1]], [[BUFFER_LOAD_DWORD_OFFSET1]], implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MUL_LO_U32_e64_]], [[V_MUL_LO_U32_e64_1]], implicit $exec
- ; CHECK: GLOBAL_STORE_DWORD %8, [[V_ADD_U32_e32_]], 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: GLOBAL_STORE_DWORD %8, [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
undef %43.sub3:vreg_128 = COPY $vgpr9
undef %42.sub2:vreg_128 = COPY $vgpr8
@@ -55,17 +55,17 @@ body: |
%33.sub1:sgpr_128 = V_READFIRSTLANE_B32 %44.sub1, implicit $exec
%33.sub2:sgpr_128 = V_READFIRSTLANE_B32 %45.sub2, implicit $exec
%33.sub3:sgpr_128 = V_READFIRSTLANE_B32 %46.sub3, implicit $exec
- %15:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %33, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %15:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %33, 0, 0, 0, 0, 0, implicit $exec
%39:vgpr_32 = V_MUL_LO_U32_e64 %15, %15, implicit $exec
undef %27.sub0:sgpr_128 = V_READFIRSTLANE_B32 %26.sub0, implicit $exec
%27.sub1:sgpr_128 = V_READFIRSTLANE_B32 %41.sub1, implicit $exec
%27.sub2:sgpr_128 = V_READFIRSTLANE_B32 %42.sub2, implicit $exec
%27.sub3:sgpr_128 = V_READFIRSTLANE_B32 %43.sub3, implicit $exec
- %19:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %27, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %19:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %27, 0, 0, 0, 0, 0, implicit $exec
%40:vgpr_32 = V_MUL_LO_U32_e64 %19, %19, implicit $exec
%23:vgpr_32 = V_ADD_U32_e32 %39, %40, implicit $exec
- GLOBAL_STORE_DWORD %38, %23, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %38, %23, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
index 02db08899236..b4a2ce11d2fd 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
@@ -47,7 +47,7 @@ body: |
liveins: $sgpr4_sgpr5
%1 = COPY $sgpr4_sgpr5
- %5 = S_LOAD_DWORD_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
$m0 = S_MOV_B32 -1
%7 = COPY %5
%6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
index 4e56625cf921..3d299ad5157b 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir
@@ -19,7 +19,7 @@ body: |
; GCN-LABEL: name: handleMove_bundle
; GCN: liveins: $sgpr4_sgpr5
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
+ ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
@@ -33,7 +33,7 @@ body: |
; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec :: (store 4, addrspace 3)
; GCN: S_ENDPGM 0
%2:sgpr_64 = COPY $sgpr4_sgpr5
- %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %2, 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
+ %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %2, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4)
%6:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
%7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
DS_WRITE_B32_gfx9 %7, %6, 0, 0, implicit $exec :: (store 4, addrspace 3)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
index e2f3677e51a8..37d78afc88a1 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
@@ -37,11 +37,11 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%12 = S_MOV_B32 123
%10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%11 = V_ADD_CO_U32_e32 %12, killed %10, implicit-def $vcc, implicit $exec
- FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -80,9 +80,9 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit $mode, implicit $exec, implicit-def $vcc
- FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
index da9486a2ae04..c642f5729ce3 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
@@ -29,19 +29,19 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
%171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec
%174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
%172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -77,13 +77,13 @@ body: |
%64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -113,7 +113,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -143,7 +143,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -172,7 +172,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -201,7 +201,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -232,7 +232,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -263,7 +263,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -294,7 +294,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -326,7 +326,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -357,7 +357,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
@@ -387,5 +387,5 @@ body: |
%31:vreg_64 = COPY killed $vcc
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
- GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
+ GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
index e6181a9cd49a..56f254196f37 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
@@ -80,7 +80,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -130,7 +130,7 @@ body: |
%100 = V_MOV_B32_e32 %48, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -227,7 +227,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -286,7 +286,7 @@ body: |
%100 = V_MOV_B32_e32 %60, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index 3b4688010aea..61c9afbc612e 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -89,7 +89,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -139,7 +139,7 @@ body: |
%100 = V_MOV_B32_e32 %48, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -256,7 +256,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -315,7 +315,7 @@ body: |
%100 = V_MOV_B32_e32 %60, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -400,7 +400,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = S_MOV_B32 65535
%6 = S_MOV_B32 65535
@@ -441,7 +441,7 @@ body: |
%100 = V_MOV_B32_e32 $vcc_lo, implicit $exec
- FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
...
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index 7229b8a118b6..604764333986 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -36,8 +36,8 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%5 = V_AND_B32_e32 65535, %3, implicit $exec
%6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
@@ -51,7 +51,7 @@ body: |
%13 = V_OR_B32_e64 %10, %12, implicit $exec
- FLAT_STORE_DWORD %0, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
@@ -88,14 +88,14 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
%10:sreg_32_xm0 = S_MOV_B32 255
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
- FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
@@ -131,14 +131,14 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
- %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
%10:sreg_32_xm0 = S_MOV_B32 65535
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
- FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
index a1d8717e24ae..45f1f7334668 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
@@ -203,7 +203,7 @@ body: |
liveins: $sgpr4_sgpr5
%4 = COPY $sgpr4_sgpr5
- %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8 = S_MOV_B64 0
%7 = COPY %9
%30 = V_MOV_B32_e32 1, implicit $exec
@@ -221,26 +221,26 @@ body: |
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
%16 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
%18 = COPY %16
- %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
%60 = V_BFE_U32_e64 %17, 8, 8, implicit $exec
%61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec
%70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
%66 = COPY %13
%65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1
- FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+ FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
%37 = S_ADD_U32 %14, 4, implicit-def $scc
%38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
%71 = COPY killed %37
%72 = COPY killed %38
%41 = REG_SEQUENCE killed %71, %subreg.sub0, killed %72, %subreg.sub1
- %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
%73 = V_BFE_U32_e64 %40, 8, 8, implicit $exec
%74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec
%83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
%78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1
- FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+ FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
%55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
%57 = REG_SEQUENCE %55, %subreg.sub0, killed %56, %subreg.sub1
@@ -365,7 +365,7 @@ body: |
liveins: $sgpr4_sgpr5
%4 = COPY $sgpr4_sgpr5
- %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%8 = S_MOV_B64 0
%7 = COPY %9
%30 = V_MOV_B32_e32 1, implicit $exec
@@ -384,26 +384,26 @@ body: |
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc
%16 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
%18 = COPY %16
- %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
+ %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
%60 = V_BFE_U32_e64 %17, 8, 8, implicit $exec
%61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec
%70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
%66 = COPY %13
%65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1
- FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
+ FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9)
%37 = S_ADD_U32 %14, 4, implicit-def $scc
%38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc
%71 = COPY killed %37
%72 = COPY killed %38
%41 = REG_SEQUENCE killed %71, %subreg.sub0, killed %72, %subreg.sub1
- %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
+ %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
%73 = V_BFE_U32_e64 %40, 8, 8, implicit $exec
%74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec
%83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
%78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
%80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1
- FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
+ FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
%55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc
%57 = REG_SEQUENCE %55, %subreg.sub0, killed %56, %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 8e46fb262ac1..1fd89e43f33e 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -41,7 +41,7 @@ body: |
%2 = COPY $sgpr30_sgpr31
%1 = COPY $vgpr2_vgpr3
%0 = COPY $vgpr0_vgpr1
- %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+ %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%12 = V_LSHRREV_B32_e64 16, %3, implicit $exec
%13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def $vcc, implicit $exec
@@ -56,6 +56,6 @@ body: |
%19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
%20 = V_MOV_B32_e64 %19, implicit $exec
- FLAT_STORE_DWORD %0, %20, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+ FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
index 2554e06569c3..ce47ba67ce77 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
@@ -84,7 +84,7 @@ body: |
bb.0:
%0:sreg_32_xm0 = COPY $sgpr32
%1:vreg_64 = IMPLICIT_DEF
- %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr
%3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit undef $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-carry.mir b/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
index 27c348ac581b..d828f0be4319 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
@@ -21,7 +21,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, implicit $exec
...
@@ -46,7 +46,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, implicit $exec
...
@@ -71,7 +71,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, implicit $exec
...
@@ -96,6 +96,6 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
- GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
index 168392bb1750..3136ef0bd6a8 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
@@ -71,8 +71,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -81,11 +81,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_ADD_CO_U32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -155,8 +155,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -165,11 +165,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_SUB_CO_U32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -239,8 +239,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -249,11 +249,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_SUBREV_CO_U32_e64 %19, %17, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -322,8 +322,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -332,12 +332,12 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%9 = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -407,8 +407,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -417,12 +417,12 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
$vcc = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -492,8 +492,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
- %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
- %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %4 = S_LOAD_DWORDX2_IMM %0, 9, 0
+ %5 = S_LOAD_DWORDX2_IMM %0, 11, 0
%26 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%27 = REG_SEQUENCE %3, 1, %26, 2
%10 = S_MOV_B32 61440
@@ -502,11 +502,11 @@ body: |
%13 = REG_SEQUENCE killed %5, 17, %12, 18
%28 = V_LSHL_B64_e64 killed %27, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
- %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
+ %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
- BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index 6c438d80e532..1bb852fc0f2e 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -75,7 +75,7 @@ body: |
# Make sure there's no assert when looking at the implicit use on S_ENDPGM
# GCN-LABEL: name: s_to_v_copy_implicit_use
-# GCN: %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %1:sreg_64, 0, 0, 0 :: (load 4, addrspace 4)
+# GCN: %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %1:sreg_64, 0, 0 :: (load 4, addrspace 4)
# GCN-NEXT: %2:vgpr_32 = COPY %0
# GCN-NEXT: S_ENDPGM 0, implicit %2
---
@@ -83,7 +83,7 @@ name: s_to_v_copy_implicit_use
tracksRegLiveness: true
body: |
bb.0:
- %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %2:sreg_64, 0, 0, 0 :: (load 4, addrspace 4)
+ %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %2:sreg_64, 0, 0 :: (load 4, addrspace 4)
%1:vgpr_32 = COPY %0
S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
index 57bedf7a7aa5..801a586dc0e1 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
@@ -9,12 +9,12 @@ body: |
bb.0:
; GCN-LABEL: name: si-lower-control-flow
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0
+ ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc
; GCN: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
; GCN: S_ENDPGM 0
%0:sgpr_64 = COPY $sgpr4_sgpr5
- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0
%2:sreg_32_xm0 = S_AND_B32 %1, 255, implicit-def $scc
%3:sreg_32_xm0 = S_AND_B32 65535, %2, implicit-def $scc
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
index 37a04c866948..5979720d0cc7 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -9,14 +9,14 @@ body: |
; CHECK-LABEL: name: skip_branch_taildup_endpgm
; CHECK: bb.0:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
- ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
; CHECK: S_WAITCNT 127
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
; CHECK: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
- ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
- ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: S_WAITCNT 112
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -52,14 +52,14 @@ body: |
successors: %bb.1, %bb.2
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7
- renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
S_WAITCNT 127
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
- renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
- renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+ renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+ renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
S_WAITCNT 112
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
index c56ac3725c7b..cec6699bd96a 100644
--- a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
+++ b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
@@ -36,8 +36,8 @@ body: |
%3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc
%3.sub3:sgpr_128 = S_MOV_B32 151468
%3.sub2:sgpr_128 = S_MOV_B32 -1
- %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
- %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0, 0 :: (dereferenceable invariant load 8)
+ %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
+ %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8)
undef %9.sub0:vreg_128 = V_LSHL_ADD_U32_e64 %6, 4, %4, implicit $exec
%9.sub1:vreg_128 = V_LSHL_ADD_U32_e64 %5, 4, %0, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
index 16f97c2b7bfc..6735853f5c74 100644
--- a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
@@ -9,7 +9,7 @@ name: hazard_smem_war
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
...
@@ -23,7 +23,7 @@ name: hazard_smem_war_no_hazard
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$sgpr3 = S_ADD_U32 $sgpr4, $sgpr5, implicit-def $scc
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -39,7 +39,7 @@ name: hazard_smem_war_dependent_salu
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 0
$sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
@@ -56,7 +56,7 @@ name: hazard_smem_war_independent_salu
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 0
$sgpr3 = S_ADD_U32 $sgpr5, $sgpr4, implicit-def $scc
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
@@ -73,8 +73,8 @@ name: hazard_smem_war_only_smem
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr6, $sgpr7, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
- $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
...
@@ -88,7 +88,7 @@ name: hazard_smem_war_only_waitcnt_0
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -104,7 +104,7 @@ name: hazard_smem_war_only_vmcnt_0
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 3952
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -120,7 +120,7 @@ name: hazard_smem_war_only_expcnt_0
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 53007
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -135,7 +135,7 @@ name: hazard_smem_war_only_lgkmcnt_0
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 49279
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -150,7 +150,7 @@ name: hazard_smem_war_only_waitcnt_lgkmcnt_0
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT_LGKMCNT $sgpr_null, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -166,7 +166,7 @@ name: hazard_smem_war_only_waitcnt_lgkmcnt_1
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT_LGKMCNT $sgpr_null, 1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -182,7 +182,7 @@ body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
successors: %bb.1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_BRANCH %bb.1
bb.1:
@@ -207,7 +207,7 @@ body: |
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
successors: %bb.1, %bb.2
$vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
bb.1:
@@ -239,7 +239,7 @@ body: |
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
successors: %bb.1, %bb.2
$vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
bb.1:
@@ -272,7 +272,7 @@ body: |
bb.1:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_BRANCH %bb.0
...
@@ -285,7 +285,7 @@ name: hazard_smem_war_impdef
body: |
bb.0:
liveins: $vcc, $vgpr0
- $sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0
V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec
S_ENDPGM 0
...
@@ -299,7 +299,7 @@ name: hazard_smem_war_readlane
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr3, $vgpr0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$sgpr0 = V_READLANE_B32 $vgpr0, $sgpr3
S_ENDPGM 0
...
@@ -313,7 +313,7 @@ name: hazard_smem_war_readfirstlane
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
index e855bbe1f181..dfe9d87a5649 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
+++ b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
@@ -19,7 +19,7 @@ body: |
%8:vgpr_32 = COPY %6
%7:vgpr_32 = V_ADD_CO_U32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec
%10:sreg_32 = COPY %7
- %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0
+ %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0
$vgpr0 = COPY %9
SI_RETURN_TO_EPILOG $vgpr0
...
@@ -43,7 +43,7 @@ body: |
%8:vgpr_32 = COPY %6
%7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec
%10:sreg_32 = COPY %7
- %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 :: (dereferenceable invariant load 4)
+ %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4)
$vgpr0 = COPY %9
SI_RETURN_TO_EPILOG $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
index b1a0bd7b6510..97737372310b 100644
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
@@ -14,33 +14,33 @@ body: |
; CHECK-LABEL: name: sgpr_clause_dbg_value
; CHECK: liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM]], 0, 0
- ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (load 4, addrspace 4)
; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM1]], 0, 0
; CHECK: S_NOP 0
; CHECK: S_NOP 0
; CHECK: S_NOP 0
- ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0 :: (load 4, addrspace 4)
- ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0 :: (load 4, addrspace 4)
; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM2]], 0, 0
; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM3]], 0, 0
- ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0 :: (load 4, addrspace 4)
; CHECK: KILL [[COPY]]
; CHECK: S_ENDPGM 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_LOAD_DWORD_IMM1]], implicit [[S_LOAD_DWORD_IMM2]], implicit [[S_LOAD_DWORD_IMM3]], implicit [[S_LOAD_DWORD_IMM4]]
%0:sreg_64 = COPY $sgpr4_sgpr5
- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4)
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (load 4, align 4, addrspace 4)
DBG_VALUE %1, 0, 0
- %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4)
+ %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (load 4, align 4, addrspace 4)
DBG_VALUE %2, 0, 0
S_NOP 0
S_NOP 0
S_NOP 0
- %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (load 4, align 4, addrspace 4)
- %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0, 0 :: (load 4, align 4, addrspace 4)
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (load 4, align 4, addrspace 4)
+ %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0 :: (load 4, align 4, addrspace 4)
DBG_VALUE %3, 0, 0
DBG_VALUE %4, 0, 0
- %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (load 4, align 4, addrspace 4)
+ %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (load 4, align 4, addrspace 4)
S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
index 454d653dcb2e..8edf93ce5027 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -17,9 +17,9 @@ body: |
; CHECK-LABEL: name: spill_a64_kill
; CHECK: liveins: $agpr0_agpr1
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
@@ -41,9 +41,9 @@ body: |
; CHECK-LABEL: name: spill_a64_undef_sub1_killed
; CHECK: liveins: $agpr0
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
@@ -63,8 +63,8 @@ body: |
; CHECK-LABEL: name: spill_a64_undef_sub0_killed
; CHECK: liveins: $agpr1
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index 6c2cc15c6dfb..cabf933a0834 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -267,15 +267,15 @@ body: |
; GFX908-EXPANDED: successors: %bb.1(0x80000000)
; GFX908-EXPANDED: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
; GFX908-EXPANDED: S_NOP 0, implicit-def renamable $agpr0
- ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GFX908-EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
- ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
+ ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
; GFX908-EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; GFX908-EXPANDED: bb.1:
; GFX908-EXPANDED: successors: %bb.2(0x80000000)
; GFX908-EXPANDED: bb.2:
- ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX908-EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; GFX908-EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; GFX908-EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
@@ -327,12 +327,12 @@ body: |
; GFX90A-EXPANDED: successors: %bb.1(0x80000000)
; GFX90A-EXPANDED: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
; GFX90A-EXPANDED: S_NOP 0, implicit-def renamable $agpr0
- ; GFX90A-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX90A-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX90A-EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; GFX90A-EXPANDED: bb.1:
; GFX90A-EXPANDED: successors: %bb.2(0x80000000)
; GFX90A-EXPANDED: bb.2:
- ; GFX90A-EXPANDED: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX90A-EXPANDED: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index bb0a55843399..e4b77496440a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -22,7 +22,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_subreg
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GCN: $exec = S_MOV_B64 killed $sgpr8_sgpr9
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -31,7 +31,7 @@ body: |
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1
; GCN: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
+ ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
; GCN: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; GCN: S_ENDPGM 0, implicit $sgpr8
renamable $sgpr1 = COPY $sgpr2
@@ -58,7 +58,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_kill
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
; GCN: $exec = S_MOV_B64 killed $sgpr8_sgpr9
; GCN: renamable $sgpr1 = COPY $sgpr2
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -66,7 +66,7 @@ body: |
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
+ ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
; GCN: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; GCN: S_ENDPGM 0
renamable $sgpr1 = COPY $sgpr2
@@ -91,10 +91,10 @@ body: |
; GCN-LABEL: name: spill_vgpr128_use_subreg
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN: renamable $vgpr1 = COPY $vgpr2
- ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; GCN: renamable $vgpr8 = COPY killed renamable $vgpr1
; GCN: S_ENDPGM 0, implicit $vgpr8
renamable $vgpr1 = COPY $vgpr2
@@ -120,10 +120,10 @@ body: |
; GCN-LABEL: name: spill_vgpr128_use_kill
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN: renamable $vgpr1 = COPY $vgpr2
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; GCN: S_ENDPGM 0
renamable $vgpr1 = COPY $vgpr2
SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 7dcecc89048e..dd230752ef4b 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -50,7 +50,7 @@ body: |
; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
; GFX9: $vcc = S_MOV_B64 $exec
; GFX9: $exec = S_MOV_B64 3
- ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX9: $exec = S_MOV_B64 $vcc
; GFX9: $vcc_hi = V_READLANE_B32 $vgpr0, 1
; GFX9: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0
@@ -59,11 +59,11 @@ body: |
; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
; GFX9: $vcc = S_MOV_B64 $exec
; GFX9: $exec = S_MOV_B64 3
- ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $vcc
; GFX9: $vcc = S_MOV_B64 $exec
; GFX9: $exec = S_MOV_B64 3
- ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX9: $exec = S_MOV_B64 killed $vcc
; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
@@ -81,7 +81,7 @@ body: |
; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
; GFX10: $vcc = S_MOV_B64 $exec
; GFX10: $exec = S_MOV_B64 3
- ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX10: $exec = S_MOV_B64 $vcc
; GFX10: $vcc_hi = V_READLANE_B32 $vgpr0, 1
; GFX10: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0
@@ -90,11 +90,11 @@ body: |
; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
; GFX10: $vcc = S_MOV_B64 $exec
; GFX10: $exec = S_MOV_B64 3
- ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; GFX10: $exec = S_MOV_B64 killed $vcc
; GFX10: $vcc = S_MOV_B64 $exec
; GFX10: $exec = S_MOV_B64 3
- ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GFX10: $exec = S_MOV_B64 killed $vcc
; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
index 417f28c3567d..5eec0d97bab6 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -276,14 +276,14 @@ body: |
; RA: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
; RA: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
; RA: }
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0, 0 :: (dereferenceable invariant load 4)
- ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0 :: (dereferenceable invariant load 4)
+ ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0 :: (dereferenceable invariant load 4)
; RA: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]]
; VR-LABEL: name: splitkit_copy_unbundle_reorder
; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
@@ -305,15 +305,15 @@ body: |
; VR: renamable $sgpr21 = COPY killed renamable $sgpr25
; VR: renamable $sgpr22 = COPY killed renamable $sgpr26
; VR: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF
- ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0 :: (dereferenceable invariant load 4)
; VR: renamable $sgpr10_sgpr11 = IMPLICIT_DEF
- ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0, 0 :: (dereferenceable invariant load 4)
- ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0 :: (dereferenceable invariant load 4)
+ ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0 :: (dereferenceable invariant load 4)
; VR: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr10_sgpr11, implicit killed renamable $sgpr8, implicit killed renamable $sgpr9, implicit killed renamable $sgpr12, implicit killed renamable $sgpr13, implicit killed renamable $sgpr14, implicit killed renamable $sgpr15, implicit killed renamable $sgpr16, implicit killed renamable $sgpr17
%0:sgpr_128 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
@@ -331,14 +331,14 @@ body: |
; Clobber registers
S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
- %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
- %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0, 0 :: (dereferenceable invariant load 4)
+ %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0 :: (dereferenceable invariant load 4)
+ %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0 :: (dereferenceable invariant load 4)
S_NOP 0, implicit %0, implicit %1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
index e129067df593..15168c7325c8 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
@@ -14,7 +14,7 @@ body: |
; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
- ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: undef %2.sub3:sgpr_128 = S_MOV_B32 61440
; CHECK: %2.sub2:sgpr_128 = S_MOV_B32 -1
; CHECK: %2.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
@@ -24,10 +24,10 @@ body: |
; CHECK: %3.sub2:sgpr_128 = COPY %2.sub2
; CHECK: %3.sub3:sgpr_128 = COPY %2.sub3
; CHECK: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec {
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: }
; CHECK: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
@@ -56,26 +56,26 @@ body: |
; CHECK: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
; CHECK: undef %117.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5)
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
; CHECK: undef %122.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
; CHECK: undef %126.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
; CHECK: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5)
; CHECK: undef %135.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5)
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: undef %140.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
; CHECK: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5)
; CHECK: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5)
; CHECK: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
; CHECK: undef %158.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
; CHECK: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
; CHECK: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
; CHECK: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
- ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec
; CHECK: undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec
; CHECK: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec
@@ -155,193 +155,193 @@ body: |
; CHECK: %43.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec
; CHECK: %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
; CHECK: %43.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: %42.sub1:vreg_128 = COPY %43.sub1
; CHECK: %42.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: %41.sub1:vreg_128 = COPY %43.sub1
; CHECK: %41.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
; CHECK: %40.sub1:vreg_128 = COPY %43.sub1
; CHECK: %40.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: %38.sub1:vreg_128 = COPY %43.sub1
; CHECK: %38.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: %37.sub1:vreg_128 = COPY %43.sub1
; CHECK: %37.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: %36.sub1:vreg_128 = COPY %43.sub1
; CHECK: %36.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
; CHECK: undef %157.sub0:vreg_128 = COPY %159.sub0 {
; CHECK: internal %157.sub2:vreg_128 = COPY %159.sub2
; CHECK: }
; CHECK: %157.sub1:vreg_128 = COPY %43.sub1
; CHECK: %157.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: undef %153.sub0:vreg_128 = COPY %155.sub0 {
; CHECK: internal %153.sub2:vreg_128 = COPY %155.sub2
; CHECK: }
; CHECK: %153.sub1:vreg_128 = COPY %43.sub1
; CHECK: %153.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5)
; CHECK: undef %148.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 {
; CHECK: internal %148.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2
; CHECK: }
; CHECK: %148.sub1:vreg_128 = COPY %43.sub1
; CHECK: %148.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5)
; CHECK: undef %143.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 {
; CHECK: internal %143.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2
; CHECK: }
; CHECK: %143.sub1:vreg_128 = COPY %43.sub1
; CHECK: %143.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
; CHECK: undef %139.sub0:vreg_128 = COPY %141.sub0 {
; CHECK: internal %139.sub2:vreg_128 = COPY %141.sub2
; CHECK: }
; CHECK: %139.sub1:vreg_128 = COPY %43.sub1
; CHECK: %139.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5)
; CHECK: undef %134.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 {
; CHECK: internal %134.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2
; CHECK: }
; CHECK: %134.sub1:vreg_128 = COPY %43.sub1
; CHECK: %134.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5)
; CHECK: undef %129.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 {
; CHECK: internal %129.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2
; CHECK: }
; CHECK: %129.sub1:vreg_128 = COPY %43.sub1
; CHECK: %129.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: undef %125.sub0:vreg_128 = COPY %127.sub0 {
; CHECK: internal %125.sub2:vreg_128 = COPY %127.sub2
; CHECK: }
; CHECK: %125.sub1:vreg_128 = COPY %43.sub1
; CHECK: %125.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
; CHECK: undef %121.sub0:vreg_128 = COPY %123.sub0 {
; CHECK: internal %121.sub2:vreg_128 = COPY %123.sub2
; CHECK: }
; CHECK: %121.sub1:vreg_128 = COPY %43.sub1
; CHECK: %121.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5)
; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 {
; CHECK: internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2
; CHECK: }
; CHECK: %116.sub1:vreg_128 = COPY %43.sub1
; CHECK: %116.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: undef %112.sub0:vreg_128 = COPY %114.sub0 {
; CHECK: internal %112.sub2:vreg_128 = COPY %114.sub2
; CHECK: }
; CHECK: %112.sub1:vreg_128 = COPY %43.sub1
; CHECK: %112.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: undef %108.sub0:vreg_128 = COPY %110.sub0 {
; CHECK: internal %108.sub2:vreg_128 = COPY %110.sub2
; CHECK: }
; CHECK: %108.sub1:vreg_128 = COPY %43.sub1
; CHECK: %108.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
; CHECK: undef %104.sub0:vreg_128 = COPY %106.sub0 {
; CHECK: internal %104.sub2:vreg_128 = COPY %106.sub2
; CHECK: }
; CHECK: %104.sub1:vreg_128 = COPY %43.sub1
; CHECK: %104.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5)
; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 {
; CHECK: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2
; CHECK: }
; CHECK: %99.sub1:vreg_128 = COPY %43.sub1
; CHECK: %99.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5)
; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 {
; CHECK: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2
; CHECK: }
; CHECK: %94.sub1:vreg_128 = COPY %43.sub1
; CHECK: %94.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5)
; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 {
; CHECK: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2
; CHECK: }
; CHECK: %89.sub1:vreg_128 = COPY %43.sub1
; CHECK: %89.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
; CHECK: undef %85.sub0:vreg_128 = COPY %87.sub0 {
; CHECK: internal %85.sub2:vreg_128 = COPY %87.sub2
; CHECK: }
; CHECK: %85.sub1:vreg_128 = COPY %43.sub1
; CHECK: %85.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5)
; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 {
; CHECK: internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2
; CHECK: }
; CHECK: %80.sub1:vreg_128 = COPY %43.sub1
; CHECK: %80.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5)
; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 {
; CHECK: internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2
; CHECK: }
; CHECK: %75.sub1:vreg_128 = COPY %43.sub1
; CHECK: %75.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5)
; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 {
; CHECK: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2
; CHECK: }
; CHECK: %70.sub1:vreg_128 = COPY %43.sub1
; CHECK: %70.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
; CHECK: undef %66.sub0:vreg_128 = COPY %68.sub0 {
; CHECK: internal %66.sub2:vreg_128 = COPY %68.sub2
; CHECK: }
; CHECK: %66.sub1:vreg_128 = COPY %43.sub1
; CHECK: %66.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5)
; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 {
; CHECK: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2
; CHECK: }
; CHECK: %61.sub1:vreg_128 = COPY %43.sub1
; CHECK: %61.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5)
; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 {
; CHECK: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2
; CHECK: }
; CHECK: %56.sub1:vreg_128 = COPY %43.sub1
; CHECK: %56.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5)
; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 {
; CHECK: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2
; CHECK: }
; CHECK: %51.sub1:vreg_128 = COPY %43.sub1
; CHECK: %51.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 {
; CHECK: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2
; CHECK: }
; CHECK: %46.sub1:vreg_128 = COPY %43.sub1
; CHECK: %46.sub3:vreg_128 = COPY %43.sub1
- ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; CHECK: S_ENDPGM 0
%0:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
undef %2.sub3:sgpr_128 = S_MOV_B32 61440
%2.sub2:sgpr_128 = S_MOV_B32 -1
%2.sub0:sgpr_128 = COPY %1.sub0
@@ -351,10 +351,10 @@ body: |
%3.sub2:sgpr_128 = COPY %2.sub2
%3.sub3:sgpr_128 = COPY %2.sub3
early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec {
- %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
- %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
- %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
- %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+ %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
}
undef %8.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub1, implicit $exec
undef %9.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub0, implicit $exec
@@ -372,22 +372,22 @@ body: |
undef %21.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub0, implicit $exec
undef %22.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub3, implicit $exec
undef %23.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub2, implicit $exec
- %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
+ %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
undef %25.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub1, implicit $exec
undef %26.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub0, implicit $exec
undef %27.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub3, implicit $exec
undef %28.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub2, implicit $exec
- %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
undef %30.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub1, implicit $exec
undef %31.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub0, implicit $exec
undef %32.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub3, implicit $exec
undef %33.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub2, implicit $exec
- %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+ %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
undef %35.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub1, implicit $exec
undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub0, implicit $exec
undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub3, implicit $exec
undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub2, implicit $exec
- %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+ %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub1, implicit $exec
undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub0, implicit $exec
undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub3, implicit $exec
@@ -427,99 +427,99 @@ body: |
%43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec
%43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
%43.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%42.sub1:vreg_128 = COPY %43.sub1
%42.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%41.sub1:vreg_128 = COPY %43.sub1
%41.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
%40.sub1:vreg_128 = COPY %43.sub1
%40.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%38.sub1:vreg_128 = COPY %43.sub1
%38.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%37.sub1:vreg_128 = COPY %43.sub1
%37.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%36.sub1:vreg_128 = COPY %43.sub1
%36.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
%35.sub1:vreg_128 = COPY %43.sub1
%35.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%33.sub1:vreg_128 = COPY %43.sub1
%33.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%32.sub1:vreg_128 = COPY %43.sub1
%32.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%31.sub1:vreg_128 = COPY %43.sub1
%31.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
%30.sub1:vreg_128 = COPY %43.sub1
%30.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%28.sub1:vreg_128 = COPY %43.sub1
%28.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%27.sub1:vreg_128 = COPY %43.sub1
%27.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%26.sub1:vreg_128 = COPY %43.sub1
%26.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
%25.sub1:vreg_128 = COPY %43.sub1
%25.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%23.sub1:vreg_128 = COPY %43.sub1
%23.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%22.sub1:vreg_128 = COPY %43.sub1
%22.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%21.sub1:vreg_128 = COPY %43.sub1
%21.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
%20.sub1:vreg_128 = COPY %43.sub1
%20.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%19.sub1:vreg_128 = COPY %43.sub1
%19.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%18.sub1:vreg_128 = COPY %43.sub1
%18.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%17.sub1:vreg_128 = COPY %43.sub1
%17.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
%16.sub1:vreg_128 = COPY %43.sub1
%16.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%15.sub1:vreg_128 = COPY %43.sub1
%15.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%14.sub1:vreg_128 = COPY %43.sub1
%14.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%13.sub1:vreg_128 = COPY %43.sub1
%13.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
%12.sub1:vreg_128 = COPY %43.sub1
%12.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%11.sub1:vreg_128 = COPY %43.sub1
%11.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
%10.sub1:vreg_128 = COPY %43.sub1
%10.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
%9.sub1:vreg_128 = COPY %43.sub1
%9.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
%8.sub1:vreg_128 = COPY %43.sub1
%8.sub3:vreg_128 = COPY %43.sub1
- BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+ BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index 3543c143838a..ffcfa32cfd3d 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -24,7 +24,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr9
; CHECK: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr10
; CHECK: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr8
- ; CHECK: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0, 0 :: (load 8 from %ir.40, addrspace 4)
+ ; CHECK: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (load 8 from %ir.40, addrspace 4)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; CHECK: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
; CHECK: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc
@@ -36,19 +36,19 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
; CHECK: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY4]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0, 0 :: (load 16 from %ir.84, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0, 0 :: (load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (load 16 from %ir.84, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: KILL undef %74:sreg_64
; CHECK: KILL undef %132:sgpr_128
; CHECK: KILL %130.sub0, %130.sub1
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: %71.sub3:sgpr_128 = S_MOV_B32 553734060
; CHECK: %71.sub2:sgpr_128 = S_MOV_B32 -1
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL undef %89:sgpr_128
; CHECK: KILL undef %118:sgpr_128
; CHECK: SI_SPILL_S128_SAVE %71, %stack.1, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
@@ -63,14 +63,14 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0, 0 :: (load 16 from %ir.91, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0, 0 :: (load 16 from %ir.97, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (load 16 from %ir.91, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (load 16 from %ir.97, addrspace 4)
; CHECK: KILL %156.sub0, %156.sub1
; CHECK: KILL %149.sub0, %149.sub1
; CHECK: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc
; CHECK: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0, 0 :: (load 16 from %ir.103, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (load 16 from %ir.103, addrspace 4)
; CHECK: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
; CHECK: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
@@ -102,22 +102,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
; CHECK: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0, 0 :: (load 16 from %ir.111, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0, 0 :: (load 16 from %ir.117, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0, 0 :: (load 16 from %ir.123, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0, 0 :: (load 16 from %ir.131, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0, 0 :: (load 16 from %ir.138, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (load 16 from %ir.111, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (load 16 from %ir.117, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (load 16 from %ir.123, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (load 16 from %ir.131, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (load 16 from %ir.138, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc
; CHECK: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc
; CHECK: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc
@@ -132,17 +132,17 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY9]], 4, implicit-def dead $scc
; CHECK: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0, 0 :: (load 16 from %ir.155, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0, 0 :: (load 16 from %ir.144, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0, 0 :: (load 16 from %ir.150, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0, 0 :: (load 16 from %ir.162, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0, 0 :: (load 16 from %ir.170, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (load 16 from %ir.155, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (load 16 from %ir.144, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (load 16 from %ir.150, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (load 16 from %ir.162, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (load 16 from %ir.170, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc
; CHECK: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc
; CHECK: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc
@@ -156,73 +156,73 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
; CHECK: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc
; CHECK: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc
; CHECK: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
; CHECK: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc
; CHECK: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0, 0 :: (load 4 from %ir..i085.i, align 8, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0, 0 :: (load 16 from %ir.176, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0, 0 :: (load 16 from %ir.185, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0, 0 :: (load 16 from %ir.194, addrspace 4)
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0, 0 :: (load 16 from %ir.200, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0 :: (load 4 from %ir..i085.i, align 8, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (load 16 from %ir.176, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (load 16 from %ir.185, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (load 16 from %ir.194, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (load 16 from %ir.200, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
; CHECK: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
; CHECK: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
- ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.308, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0, 0 :: (load 16 from %ir.236, addrspace 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0, 0 :: (load 16 from %ir.242, addrspace 4)
+ ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (load 8 from %ir.308, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (load 16 from %ir.223, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (load 16 from %ir.230, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (load 16 from %ir.236, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (load 16 from %ir.242, addrspace 4)
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY1]], 3, implicit-def dead $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
; CHECK: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
; CHECK: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
- ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.320, addrspace 4)
+ ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (load 8 from %ir.320, addrspace 4)
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
- ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.282, addrspace 4)
- ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (load 16 from %ir.282, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
; CHECK: KILL %411.sub0, %411.sub1
; CHECK: KILL undef %488:sreg_64
; CHECK: KILL %71.sub0_sub1
; CHECK: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.291, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (load 16 from %ir.291, addrspace 4)
; CHECK: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
; CHECK: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
; CHECK: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
- ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0, 0 :: (load 4 from %ir..i0100.i, align 8, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0 :: (load 4 from %ir..i0100.i, align 8, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL [[S_LOAD_DWORDX4_IMM24]]
; CHECK: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_]], implicit-def dead $scc
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
; CHECK: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc
; CHECK: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc
@@ -234,16 +234,16 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.351, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (load 16 from %ir.351, addrspace 4)
; CHECK: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.357, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (load 16 from %ir.357, addrspace 4)
; CHECK: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
- ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.363, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (load 16 from %ir.363, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK: KILL [[S_LOAD_DWORDX4_IMM25]]
; CHECK: KILL [[V_MOV_B32_e32_]]
@@ -358,20 +358,20 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK: internal %914.sub0:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub0
; CHECK: }
; CHECK: %914.sub1:sgpr_128 = COPY [[SI_SPILL_S32_RESTORE1]]
- ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %914, 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %914, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[V_ADD_U32_e32_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_62]], [[V_ADD_U32_e32_27]], implicit $exec
; CHECK: [[V_ADD_U32_e32_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec
; CHECK: [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec
- ; CHECK: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0, 0 :: (load 32 from `<8 x i32> addrspace(4)* undef`, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (load 32 from `<8 x i32> addrspace(4)* undef`, addrspace 4)
; CHECK: [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec
; CHECK: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
; CHECK: [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e32_67]], implicit $exec
; CHECK: undef %691.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; CHECK: IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
+ ; CHECK: IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
; CHECK: S_ENDPGM 0
.expVert:
%0 = extractelement <31 x i32> %userData, i64 2
diff --git a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
index 09196b7f35a6..c7a8fd96ea85 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
@@ -17,10 +17,10 @@ machineFunctionInfo:
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
- %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
- %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $flat_scr, implicit $exec
+ %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $flat_scr, implicit $exec
+ %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $flat_scr, implicit $exec
S_NOP 0, implicit %0
- %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
- %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0
S_NOP 0, implicit %1
...
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
index 58aeddd7b3b3..c9619a65c1a1 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
@@ -97,7 +97,7 @@ body: |
%11.sub5:sgpr_256 = COPY %11.sub0
%11.sub6:sgpr_256 = COPY %11.sub0
%11.sub7:sgpr_256 = COPY %11.sub0
- %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
+ %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%15:vreg_128 = IMPLICIT_DEF
S_CBRANCH_SCC1 %bb.8, implicit undef $scc
@@ -163,12 +163,12 @@ body: |
%18:vgpr_32 = V_MAD_F32_e64 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec
%19:vgpr_32 = V_MAD_F32_e64 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0, 0 :: (dereferenceable invariant load 16)
+ %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0 :: (dereferenceable invariant load 16)
%22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec
%23:vgpr_32 = V_MAD_F32_e64 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%24:vgpr_32 = COPY %20.sub3
%25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec
- %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0, 0 :: (dereferenceable invariant load 16)
+ %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0 :: (dereferenceable invariant load 16)
%28:vgpr_32 = V_MAD_F32_e64 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec
%29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec
%30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec
@@ -268,7 +268,7 @@ body: |
%62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec
%63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec
%63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec
- %64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
+ %64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec
%65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec
%66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/subvector-test.mir b/llvm/test/CodeGen/AMDGPU/subvector-test.mir
index fd6ddea87f8e..2839aad9ba1a 100644
--- a/llvm/test/CodeGen/AMDGPU/subvector-test.mir
+++ b/llvm/test/CodeGen/AMDGPU/subvector-test.mir
@@ -15,8 +15,8 @@ body: |
successors: %bb.1, %bb.2
%1:sgpr_64 = COPY $sgpr0_sgpr1
- %4:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 36, 0, 0
- %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4.sub2_sub3, 0, 0, 0
+ %4:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 36, 0
+ %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4.sub2_sub3, 0, 0
undef %15.sub0:vreg_64 = COPY %4.sub0
%15.sub1:vreg_64 = COPY %4.sub1
%16:vgpr_32 = COPY %1.sub0
@@ -31,6 +31,6 @@ body: |
bb.2:
- GLOBAL_STORE_DWORD %15, %16, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD %15, %16, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/syncscopes.ll b/llvm/test/CodeGen/AMDGPU/syncscopes.ll
index 5b5f9843d449..e78967bbf8ca 100644
--- a/llvm/test/CodeGen/AMDGPU/syncscopes.ll
+++ b/llvm/test/CodeGen/AMDGPU/syncscopes.ll
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s
; GCN-LABEL: name: syncscopes
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
-; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
define void @syncscopes(
i32 %agent,
i32* %agent_out,
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
index 4bff6fdaad7b..9f42347f5ec8 100644
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -25,7 +25,8 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float
; GCN: bb.2.else:
; GCN: successors:
; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: S_WAITCNT 3952
; GCN: bb.3:
entry:
%cc = icmp sgt i32 %a, 0
@@ -61,7 +62,8 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a,
; GCN: bb.4.else:
; GCN: successors:
; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: S_WAITCNT 3952
; GCN: bb.5:
entry:
%cc = icmp sgt i32 %a, 0
diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index be9f864cfc36..fb81b04e7fde 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -41,44 +41,44 @@ body: |
; CHECK: renamable $sgpr14 = COPY renamable $sgpr5
; CHECK: renamable $sgpr15 = COPY renamable $sgpr5
; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
- ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
- ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load 16, addrspace 6)
; CHECK: renamable $sgpr0 = S_MOV_B32 1200
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
- ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK: renamable $sgpr0 = S_MOV_B32 1264
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
- ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK: renamable $sgpr0 = S_MOV_B32 1328
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
- ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6)
; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
- ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr0 = S_MOV_B32 1392
; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 32, addrspace 6)
+ ; CHECK: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load 32, addrspace 6)
; CHECK: renamable $sgpr2 = S_MOV_B32 1456
; CHECK: renamable $sgpr3 = COPY renamable $sgpr5
- ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
+ ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load 32, addrspace 6)
; CHECK: renamable $sgpr4 = S_MOV_B32 1520
- ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
- ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
- ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
- ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- ; CHECK: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6)
+ ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ ; CHECK: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
; CHECK: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.0, align 4, addrspace 5)
- ; CHECK: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ ; CHECK: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK: KILL killed renamable $sgpr92_sgpr93_sgpr94_sgpr95
; CHECK: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
@@ -121,40 +121,40 @@ body: |
%5.sub6:sgpr_256 = COPY %1.sub1
%5.sub7:sgpr_256 = COPY %1.sub1
%6:vreg_64 = COPY %4
- %7:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
- %8:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0, 0 :: (load 16, addrspace 6)
+ %7:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1088, 0 :: (dereferenceable load 32, addrspace 6)
+ %8:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load 16, addrspace 6)
undef %9.sub0:sreg_64_xexec = S_MOV_B32 1200
%9.sub1:sreg_64_xexec = COPY %1.sub1
- %10:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
- %11:sgpr_128 = S_LOAD_DWORDX4_IMM %9, 0, 0, 0 :: (load 16, addrspace 6)
+ %10:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1152, 0 :: (dereferenceable load 32, addrspace 6)
+ %11:sgpr_128 = S_LOAD_DWORDX4_IMM %9, 0, 0 :: (load 16, addrspace 6)
undef %12.sub0:sreg_64_xexec = S_MOV_B32 1264
%12.sub1:sreg_64_xexec = COPY %1.sub1
- %13:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
- %14:sgpr_128 = S_LOAD_DWORDX4_IMM %12, 0, 0, 0 :: (load 16, addrspace 6)
+ %13:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1216, 0 :: (dereferenceable load 32, addrspace 6)
+ %14:sgpr_128 = S_LOAD_DWORDX4_IMM %12, 0, 0 :: (load 16, addrspace 6)
undef %15.sub0:sreg_64_xexec = S_MOV_B32 1328
%15.sub1:sreg_64_xexec = COPY %1.sub1
- %16:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
- %17:sgpr_128 = S_LOAD_DWORDX4_IMM %15, 0, 0, 0 :: (load 16, addrspace 6)
- %18:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
+ %16:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1280, 0 :: (dereferenceable load 32, addrspace 6)
+ %17:sgpr_128 = S_LOAD_DWORDX4_IMM %15, 0, 0 :: (load 16, addrspace 6)
+ %18:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1344, 0 :: (dereferenceable load 32, addrspace 6)
undef %19.sub0:sreg_64_xexec = S_MOV_B32 1392
%19.sub1:sreg_64_xexec = COPY %1.sub1
- %20:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 0, 0, 0 :: (load 32, addrspace 6)
+ %20:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 0, 0 :: (load 32, addrspace 6)
undef %21.sub0:sreg_64_xexec = S_MOV_B32 1456
%21.sub1:sreg_64_xexec = COPY %1.sub1
- %22:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
+ %22:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1472, 0 :: (dereferenceable load 32, addrspace 6)
%1.sub0:sgpr_64 = S_MOV_B32 1520
- %23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0, 0 :: (load 16, addrspace 6)
- %24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0, 0 :: (load 16, addrspace 6)
- %25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0, 0 :: (load 16, addrspace 6)
- %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
- %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
+ %23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0 :: (load 16, addrspace 6)
+ %24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load 16, addrspace 6)
+ %25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0 :: (load 16, addrspace 6)
+ %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
%35:vgpr_32 = nofpexcept V_MAX_F32_e32 %26, %27, implicit $mode, implicit $exec
%36:vgpr_32 = V_MAX3_F32_e64 0, %35, 0, %28, 0, %29, 0, 0, implicit $mode, implicit $exec
%37:vgpr_32 = nofpexcept V_ADD_F32_e32 -1083321614, %31, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index 2bed3e0162c7..8c75b1f78e95 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -44,7 +44,7 @@ body: |
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13
$vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
- $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
@@ -109,7 +109,7 @@ body: |
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13
$vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
- $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
+ $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
$vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
index a5525ee15fbe..ee09c984a162 100644
--- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
+++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
@@ -14,7 +14,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec
@@ -24,7 +24,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
@@ -32,7 +32,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
bb.3:
@@ -40,7 +40,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -56,7 +56,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
@@ -65,7 +65,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3
@@ -73,7 +73,7 @@ body: |
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
bb.3:
@@ -81,7 +81,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
$sgpr2 = S_MOV_B32 -1
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -110,9 +110,9 @@ body: |
# instructions to fix vccz.
# CHECK-LABEL: name: reload_vcc_from_mem
-# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
# CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
-# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
+# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
# CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
# SI: $vcc = S_MOV_B64 $vcc
# GFX9: $vcc = S_MOV_B64 $vcc
@@ -121,9 +121,9 @@ body: |
name: reload_vcc_from_mem
body: |
bb.0:
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
$vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
$vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
bb.1:
@@ -188,14 +188,14 @@ body: |
---
# CHECK-LABEL: name: load_wait_def_use
# SI: S_WAITCNT 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: $vcc = S_MOV_B64 0
# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
name: load_wait_def_use
body: |
bb.0:
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 127
$vcc = S_MOV_B64 0
S_CBRANCH_VCCZ %bb.1, implicit $vcc
@@ -205,7 +205,7 @@ body: |
---
# CHECK-LABEL: name: load_wait_nop_def_use
# SI: S_WAITCNT 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: S_NOP 0
# SI-NEXT: $vcc = S_MOV_B64 0
@@ -213,7 +213,7 @@ body: |
name: load_wait_nop_def_use
body: |
bb.0:
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 127
S_NOP 0
$vcc = S_MOV_B64 0
@@ -224,7 +224,7 @@ body: |
---
# CHECK-LABEL: name: load_def_wait_use
# SI: S_WAITCNT 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: $vcc = S_MOV_B64 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: $vcc = S_MOV_B64 $vcc
@@ -232,7 +232,7 @@ body: |
name: load_def_wait_use
body: |
bb.0:
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vcc = S_MOV_B64 0
S_WAITCNT 127
S_CBRANCH_VCCZ %bb.1, implicit $vcc
@@ -241,7 +241,7 @@ body: |
# CHECK-LABEL: name: load_def_wait_nop_use
# SI: S_WAITCNT 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: $vcc = S_MOV_B64 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: S_NOP 0
@@ -250,7 +250,7 @@ body: |
name: load_def_wait_nop_use
body: |
bb.0:
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vcc = S_MOV_B64 0
S_WAITCNT 127
S_NOP 0
@@ -261,7 +261,7 @@ body: |
---
# CHECK-LABEL: name: load_def_use
# SI: S_WAITCNT 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: $vcc = S_MOV_B64 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: $vcc = S_MOV_B64 $vcc
@@ -269,7 +269,7 @@ body: |
name: load_def_use
body: |
bb.0:
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vcc = S_MOV_B64 0
S_CBRANCH_VCCZ %bb.1, implicit $vcc
bb.1:
@@ -279,7 +279,7 @@ body: |
# CHECK-LABEL: name: def_load_wait_use
# SI: S_WAITCNT 0
# SI-NEXT: $vcc = S_MOV_B64 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: $vcc = S_MOV_B64 $vcc
# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
@@ -287,7 +287,7 @@ name: def_load_wait_use
body: |
bb.0:
$vcc = S_MOV_B64 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 127
S_CBRANCH_VCCZ %bb.1, implicit $vcc
bb.1:
@@ -297,7 +297,7 @@ body: |
# CHECK-LABEL: name: def_load_wait_nop_use
# SI: S_WAITCNT 0
# SI-NEXT: $vcc = S_MOV_B64 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: S_NOP 0
# SI-NEXT: $vcc = S_MOV_B64 $vcc
@@ -306,7 +306,7 @@ name: def_load_wait_nop_use
body: |
bb.0:
$vcc = S_MOV_B64 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_WAITCNT 127
S_NOP 0
S_CBRANCH_VCCZ %bb.1, implicit $vcc
@@ -317,7 +317,7 @@ body: |
# CHECK-LABEL: name: def_load_use
# SI: S_WAITCNT 0
# SI-NEXT: $vcc = S_MOV_B64 0
-# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
# SI-NEXT: S_WAITCNT 127
# SI-NEXT: $vcc = S_MOV_B64 $vcc
# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
@@ -325,7 +325,7 @@ name: def_load_use
body: |
bb.0:
$vcc = S_MOV_B64 0
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
S_CBRANCH_VCCZ %bb.1, implicit $vcc
bb.1:
...
diff --git a/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir b/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir
index 744dc38bff4c..16d308bfcc7f 100644
--- a/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/verify-gfx90a-aligned-vgprs.mir
@@ -44,8 +44,8 @@ body: |
bb.0:
%0:vreg_64_align2 = IMPLICIT_DEF
%1:vreg_128_align2 = IMPLICIT_DEF
- GLOBAL_STORE_DWORDX2 %0, %1.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %1.sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %1.sub0_sub1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %1.sub2_sub3, 0, 0, implicit $exec
...
---
@@ -64,42 +64,42 @@ body: |
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
- GLOBAL_STORE_DWORDX2 %0, %1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 %0, %3, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 %0, %3, 0, 0, implicit $exec
; Check virtual registers with subregisters
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
- GLOBAL_STORE_DWORDX2 %0, %3.sub0_sub1, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %3.sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %3.sub1_sub2, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX2 %0, %5.sub1_sub2, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %3.sub0_sub1, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %3.sub2_sub3, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %3.sub1_sub2, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 %0, %5.sub1_sub2, 0, 0, implicit $exec
; Check physical register uses
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
- GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr3_vgpr4_vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr3_vgpr4, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr3_vgpr4_vgpr5_vgpr6, 0, 0, implicit $exec
; Check virtual register defs
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
- %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
- %7:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
- %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
+ %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+ %7:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
+ %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
- $vgpr1_vgpr2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1_vgpr2_vgpr3_vgpr4 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1_vgpr2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
+ $vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
+ $vgpr1_vgpr2_vgpr3_vgpr4 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
; Check AGPRs
; CHECK: *** Bad machine code: Subtarget requires even aligned vector registers ***
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
index b629e98c27cd..2422fe046ebf 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -16,7 +16,7 @@ body: |
; CHECK-LABEL: name: spill_v32
; CHECK: liveins: $vgpr0
- ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
; CHECK: S_NOP 0, implicit $vgpr0
SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
S_NOP 0, implicit $vgpr0
@@ -37,7 +37,7 @@ body: |
; CHECK-LABEL: name: spill_v32_kill
; CHECK: liveins: $vgpr0
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
...
@@ -56,8 +56,8 @@ body: |
; CHECK-LABEL: name: spill_v64
; CHECK: liveins: $vgpr0_vgpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; CHECK: S_NOP 0, implicit $vgpr0_vgpr1
SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
S_NOP 0, implicit $vgpr0_vgpr1
@@ -78,8 +78,8 @@ body: |
; CHECK-LABEL: name: spill_v64_kill
; CHECK: liveins: $vgpr0_vgpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
@@ -100,8 +100,8 @@ body: |
; CHECK-LABEL: name: spill_v64_undef_sub1_killed
; CHECK: liveins: $vgpr0
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
@@ -120,8 +120,8 @@ body: |
; CHECK-LABEL: name: spill_v64_undef_sub0_killed
; CHECK: liveins: $vgpr1
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
@@ -140,9 +140,9 @@ body: |
; CHECK-LABEL: name: spill_v128_kill
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
- ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, addrspace 5)
...
diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index c8505ef4427b..f5eab81eab5e 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -29,7 +29,7 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: undef_identity_copy
- ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
+ ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK: $sgpr4 = COPY $sgpr95
@@ -44,9 +44,9 @@ body: |
; CHECK: $vgpr3 = KILL undef renamable $vgpr3
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
- ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; CHECK: S_ENDPGM 0
- %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
+ %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
%2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc
$sgpr4 = COPY $sgpr95
@@ -62,7 +62,7 @@ body: |
dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
%5:vgpr_32 = COPY $vgpr0
ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
- FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+ FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
index ccd9b1af043b..6ffedea5a9f0 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
@@ -11,7 +11,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec
@@ -26,7 +26,7 @@ body: |
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
- BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_write_sgpr_chain
@@ -45,7 +45,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$sgpr5 = S_MOV_B32 $sgpr0
$sgpr6 = S_MOV_B32 $sgpr1
$sgpr7 = S_MOV_B32 $sgpr2
@@ -63,8 +63,8 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
...
# GCN-LABEL: name: vmem_snop_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
@@ -78,7 +78,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_NOP 0
$sgpr0 = S_MOV_B32 0
...
@@ -93,7 +93,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
$sgpr0 = S_MOV_B32 0
...
@@ -108,7 +108,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_WAITCNT 0
$sgpr0 = S_MOV_B32 0
...
@@ -124,7 +124,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_WAITCNT 1
$sgpr0 = S_MOV_B32 0
...
@@ -139,7 +139,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: vmem_write_exec_expread
@@ -152,7 +152,7 @@ body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, implicit $exec
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: ds_write_m0
@@ -181,7 +181,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
bb.1:
$sgpr0 = S_MOV_B32 0
@@ -199,7 +199,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -219,7 +219,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.2
bb.1:
@@ -247,7 +247,7 @@ body: |
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.1
@@ -275,7 +275,7 @@ body: |
$sgpr0 = S_MOV_B32 0
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.0
...
# GCN-LABEL: name: ds_write_exec
@@ -300,7 +300,7 @@ name: vmem_scratch_exec
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
- $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_flat_exec
@@ -313,7 +313,7 @@ body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
- $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_exec
@@ -326,7 +326,7 @@ body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_atomic_exec
@@ -340,6 +340,6 @@ body: |
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
- $vgpr3 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1)
+ $vgpr3 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1)
$exec_lo = S_MOV_B32 -1
...
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
index 8c05c7041aa4..182ef1f72ac4 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
@@ -15,7 +15,7 @@ body: |
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_to_next
# GCN: bb.1:
@@ -33,7 +33,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far
# GCN: bb.1:
@@ -54,7 +54,7 @@ body: |
$sgpr0 = S_MOV_B32 0
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops
# GCN: bb.1:
@@ -71,7 +71,7 @@ body: |
S_NOP 4
bb.1:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_around
# GCN: bb.2:
@@ -97,7 +97,7 @@ body: |
S_NOP 0
bb.2:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_backedge
# GCN: S_NOP 3
@@ -110,7 +110,7 @@ body: |
$vgpr0 = IMPLICIT_DEF
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
bb.1:
$vgpr0 = IMPLICIT_DEF
@@ -139,7 +139,7 @@ body: |
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
bb.2:
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_self_loop
# GCN: S_NOP 3
@@ -152,7 +152,7 @@ body: |
$vgpr0 = IMPLICIT_DEF
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.0
...
@@ -175,7 +175,7 @@ body: |
successors: %bb.1
$sgpr0 = S_MOV_B32 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.1
...
@@ -199,7 +199,7 @@ body: |
successors: %bb.1
$sgpr0 = S_MOV_B32 0
- $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
S_BRANCH %bb.1
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir
index a5043423840d..96c61fc1c6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir
@@ -59,42 +59,42 @@ body: |
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: S_WAITCNT 0
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4, addrspace 1)
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1)
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4, addrspace 1)
+ ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1)
; GCN: S_WAITCNT 3953
; GCN: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_WAITCNT 3952
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1)
+ ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1)
; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.2:
; GCN: S_WAITCNT 49279
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
; GCN: S_WAITCNT 3952
- ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
+ ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
successors: %bb.1
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
- $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
+ $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
S_BRANCH %bb.2
bb.2:
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
- $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
$vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
S_ENDPGM 0
...
@@ -110,19 +110,19 @@ body: |
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: S_WAITCNT 0
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: bb.1:
; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
; GCN: S_WAITCNT 112
- ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_ENDPGM 0
bb.0:
successors: %bb.1
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
- FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -139,28 +139,28 @@ body: |
; GCN: bb.0:
; GCN: successors: %bb.2(0x80000000)
; GCN: S_WAITCNT 0
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
- ; GCN: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
; GCN: S_WAITCNT 112
- ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_ENDPGM 0
bb.0:
successors: %bb.2
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_BRANCH %bb.2
bb.1:
- FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
bb.2:
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
- FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -175,9 +175,9 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr1_vgpr2
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
- FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -192,19 +192,19 @@ body: |
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: bundle_no_waitcnt
; GCN: liveins: $vgpr1_vgpr2
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: BUNDLE {
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: }
; GCN: S_WAITCNT 112
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_NOP 0
}
- FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -220,18 +220,18 @@ body: |
liveins: $vgpr1_vgpr2
; GCN-LABEL: name: preexisting_waitcnt_in_bundle
; GCN: liveins: $vgpr1_vgpr2
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: BUNDLE {
; GCN: S_NOP 0
; GCN: S_WAITCNT 0
; GCN: }
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_WAITCNT 0
}
- FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -249,13 +249,13 @@ body: |
; GCN-LABEL: name: insert_in_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: S_WAITCNT 112
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: }
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
@@ -274,15 +274,15 @@ body: |
; GCN-LABEL: name: exit_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: }
; GCN: S_WAITCNT 112
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
}
- FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -301,16 +301,16 @@ body: |
; GCN-LABEL: name: cross_bundle
; GCN: liveins: $vgpr1_vgpr2
; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: }
; GCN: S_WAITCNT 112
; GCN: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
- ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: }
BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
- $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
}
BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
- FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
index 795f0974f480..7945db91c1ab 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir
@@ -13,8 +13,8 @@ body: |
$vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
$vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
- $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
$vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
@@ -23,7 +23,7 @@ body: |
bb.3:
successors: %bb.1
- $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
bb.1:
successors: %bb.5, %bb.2
@@ -43,7 +43,7 @@ body: |
bb.4:
successors: %bb.3, %bb.1
- $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $mode, implicit $exec
V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
index ee9978abe848..7fbd45a41b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir
@@ -37,8 +37,8 @@ body: |
bb.2:
successors: %bb.3
- renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0
- renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0
+ renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0
+ renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0
bb.3:
successors: %bb.1, %bb.4
@@ -73,12 +73,12 @@ name: irreducible_loop_extended
body: |
bb.0:
successors: %bb.1, %bb.2
- $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0
+ $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit $vcc
bb.1:
successors: %bb.2
- BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
bb.2:
successors: %bb.3, %bb.6
@@ -86,18 +86,18 @@ body: |
bb.3:
successors: %bb.4, %bb.5
- BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
bb.4:
successors: %bb.5
- renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0, 0
- renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 1, 0, implicit $exec
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0
+ renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 1, implicit $exec
bb.5:
successors: %bb.6
bb.6:
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
index 5e261bfa352c..55ab3ffd1f42 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir
@@ -15,11 +15,11 @@ body: |
bb.0:
S_BRANCH %bb.1
bb.1:
- GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, 0, 0, implicit $exec
- $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec
- $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec
- GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, 0, 0, implicit $exec
- $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, implicit $exec
+ $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, implicit $exec
+ $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, implicit $exec
S_CBRANCH_SCC1 %bb.1, implicit $scc
bb.2:
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir
index 63dcb67cd706..067c050344f4 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-meta-instructions.mir
@@ -12,9 +12,9 @@ body: |
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: waitcnt_kill
; GCN: S_WAITCNT 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN: KILL $vgpr0
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
KILL $vgpr0
...
@@ -27,9 +27,9 @@ body: |
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: waitcnt_implicit_def
; GCN: S_WAITCNT 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN: $vgpr0 = IMPLICIT_DEF
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr0 = IMPLICIT_DEF
...
@@ -42,9 +42,9 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2
; GCN-LABEL: name: waitcnt_eh_label
; GCN: S_WAITCNT 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN: EH_LABEL <mcsymbol Ltmp0>, implicit $vgpr0
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
EH_LABEL <mcsymbol Ltmp0>, implicit $vgpr0
...
@@ -58,9 +58,9 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2
; GCN-LABEL: name: waitcnt_cfi
; GCN: S_WAITCNT 0
- ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN: CFI_INSTRUCTION offset $vgpr0_lo16, 16
- $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
CFI_INSTRUCTION offset $vgpr0, 16
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir
index ecaf9cd43cba..92e73f6db39d 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir
@@ -17,7 +17,7 @@ body: |
bb.1:
S_WAITCNT 3952
- FLAT_ATOMIC_CMPSWAP undef renamable $vgpr0_vgpr1, renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_ATOMIC_CMPSWAP undef renamable $vgpr0_vgpr1, renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
BUFFER_WBINVL1 implicit $exec
S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
index 0fcdfc6c2da6..eb8dc2775989 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
@@ -109,73 +109,73 @@ body: |
; GFX9-LABEL: name: max-counter-vmcnt
; GFX9: S_WAITCNT 0
- ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec
; GFX9: S_WAITCNT 53118
; GFX9: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec
; GFX9: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec
@@ -185,146 +185,146 @@ body: |
; GFX10-LABEL: name: max-counter-vmcnt
; GFX10: S_WAITCNT 0
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX10: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec
+ ; GFX10: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec
; GFX10: S_WAITCNT 65406
; GFX10: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec
; GFX10: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec
; GFX10: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec
; GFX10: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec
; GFX10: S_ENDPGM 0
- $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec
+ $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec
+ $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec
+ $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, implicit $exec
+ $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, implicit $exec
+ $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, implicit $exec
+ $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, implicit $exec
+ $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, implicit $exec
+ $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, implicit $exec
+ $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, implicit $exec
+ $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, implicit $exec
+ $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, implicit $exec
+ $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, implicit $exec
+ $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, implicit $exec
+ $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, implicit $exec
+ $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, implicit $exec
+ $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, implicit $exec
+ $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, implicit $exec
+ $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, implicit $exec
+ $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, implicit $exec
+ $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, implicit $exec
+ $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, implicit $exec
+ $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, implicit $exec
+ $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, implicit $exec
+ $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, implicit $exec
+ $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, implicit $exec
+ $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, implicit $exec
+ $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, implicit $exec
+ $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, implicit $exec
+ $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, implicit $exec
+ $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, implicit $exec
+ $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, implicit $exec
+ $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, implicit $exec
+ $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, implicit $exec
+ $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, implicit $exec
+ $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, implicit $exec
+ $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, implicit $exec
+ $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, implicit $exec
+ $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, implicit $exec
+ $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, implicit $exec
+ $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, implicit $exec
+ $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, implicit $exec
+ $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, implicit $exec
+ $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, implicit $exec
+ $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, implicit $exec
+ $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, implicit $exec
+ $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, implicit $exec
+ $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, implicit $exec
+ $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, implicit $exec
+ $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, implicit $exec
+ $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, implicit $exec
+ $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, implicit $exec
+ $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, implicit $exec
+ $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, implicit $exec
+ $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, implicit $exec
+ $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, implicit $exec
+ $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, implicit $exec
+ $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, implicit $exec
+ $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, implicit $exec
+ $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, implicit $exec
+ $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, implicit $exec
+ $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, implicit $exec
+ $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, implicit $exec
+ $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, implicit $exec
+ $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, implicit $exec
+ $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, implicit $exec
$vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec
$vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec
$vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
index 16f527114d7f..5246232fabd1 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -20,7 +20,7 @@ body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
- renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0, 0
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0
renamable $vgpr13 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
S_WAITCNT -16257
renamable $vgpr0_vgpr1 = DS_READ2_B32 renamable $vgpr13, 0, 1, 0, implicit $m0, implicit $exec
@@ -32,6 +32,6 @@ body: |
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $exec
$vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $exec
- IMAGE_STORE_V4_V2 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ IMAGE_STORE_V4_V2 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir
index f56a913da359..3b8027de6c7d 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir
@@ -11,10 +11,10 @@ body: |
; GFX9-LABEL: name: buffer_buffer
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
; GFX9: S_WAITCNT 0
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, implicit $exec
...
# Two tbuffer loads with overlapping outputs. No waitcnt required.
@@ -27,10 +27,10 @@ body: |
; GFX9-LABEL: name: tbuffer_tbuffer
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
; GFX9: S_WAITCNT 0
- ; GFX9: $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GFX9: $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, 0, 0, 0, implicit $exec
- $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, implicit $exec
+ ; GFX9: $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, implicit $exec
+ $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, implicit $exec
...
# Two gathers with overlapping outputs. (Note gathers can't be trimmed because
@@ -44,10 +44,10 @@ body: |
; GFX9-LABEL: name: gather_gather
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
; GFX9: S_WAITCNT 0
- ; GFX9: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- ; GFX9: $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ ; GFX9: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ ; GFX9: $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
...
# Image load vs image sample. Waitcnt required because they are not guaranteed
@@ -62,9 +62,9 @@ body: |
; GFX9-LABEL: name: nosampler_sampler
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9: S_WAITCNT 0
- ; GFX9: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ ; GFX9: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
; GFX9: S_WAITCNT 3952
- ; GFX9: $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16)
- $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
- $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16)
+ ; GFX9: $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16)
+ $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
+ $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16)
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir
index c2b3e54fbe35..24b1afeece13 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir
@@ -10,8 +10,8 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $sgpr0_sgpr1
- $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT_VSCNT undef $sgpr_null, 0
- $vgpr0 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1)
+ $vgpr0 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1)
S_CMP_LG_U32 killed $sgpr4, 0, implicit-def $scc
...
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
index a0cc9464d5ae..0b79a092c678 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
@@ -87,34 +87,34 @@ name: flat_zero_waitcnt
body: |
bb.0:
successors: %bb.1
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_BRANCH %bb.2
bb.2:
successors: %bb.3
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
- $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
S_BRANCH %bb.3
bb.3:
successors: %bb.4
- $vgpr3 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
- $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
+ $vgpr3 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
$vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec
S_BRANCH %bb.4
bb.4:
- $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
+ $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
$vgpr0 = V_MOV_B32_e32 $vgpr5, implicit $exec
S_ENDPGM 0
...
@@ -135,11 +135,11 @@ name: single_fallthrough_successor_no_end_block_wait
body: |
bb.0:
successors: %bb.1
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
$vgpr3_vgpr4 = V_LSHLREV_B64_e64 4, $vgpr7_vgpr8, implicit $exec
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@@ -162,16 +162,16 @@ name: single_branch_successor_not_next_block
body: |
bb.0:
successors: %bb.2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
S_BRANCH %bb.2
bb.1:
- FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
bb.2:
$vgpr3_vgpr4 = V_LSHLREV_B64_e64 4, $vgpr7_vgpr8, implicit $exec
- FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@@ -186,9 +186,9 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr1_vgpr2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
- FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -208,12 +208,12 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr1_vgpr2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_NOP 0
}
- FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -231,12 +231,12 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr1_vgpr2
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
BUNDLE {
S_NOP 0
S_WAITCNT 0
}
- FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -245,9 +245,9 @@ body: |
# Def and use inside bundle
# CHECK-LABEL: name: insert_in_bundle{{$}}
# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
-# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
# CHECK-NEXT: S_WAITCNT 112
-# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
# CHECK-NEXT: }
name: insert_in_bundle
@@ -258,8 +258,8 @@ body: |
bb.0:
liveins: $vgpr1_vgpr2
BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, internal $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
@@ -269,10 +269,10 @@ body: |
# CHECK-LABEL: name: exit_bundle{{$}}
# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
-# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
# CHECK-NEXT: }
# CHECK-NEXT: S_WAITCNT 112
-# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
name: exit_bundle
tracksRegLiveness: true
@@ -282,10 +282,10 @@ body: |
bb.0:
liveins: $vgpr1_vgpr2
BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
}
- FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
...
@@ -295,11 +295,11 @@ body: |
# CHECK-LABEL: name: cross_bundle{{$}}
# CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
-# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
# CHECK-NEXT: }
# CHECK-NEXT: S_WAITCNT 112
# CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 {
-# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+# CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
# CHECK-NEXT: }
name: cross_bundle
@@ -310,10 +310,10 @@ body: |
bb.0:
liveins: $vgpr1_vgpr2
BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 {
- $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
}
BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 {
- FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
}
...
@@ -328,7 +328,7 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
- $vgpr0 = FLAT_LOAD_USHORT killed $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
- $vgpr1 = FLAT_LOAD_USHORT killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = FLAT_LOAD_USHORT killed $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr1 = FLAT_LOAD_USHORT killed $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
V_NOP_e32 implicit $exec, implicit $vgpr0_lo16, implicit $vgpr1_lo16
...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 0c339ccfcf6a..78ee39de8d99 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -73,7 +73,7 @@ body: |
bb.1:
S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
- %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, implicit $exec
%12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
%5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
%11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
@@ -130,14 +130,14 @@ body: |
%6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
%5:sgpr_128 = COPY %6
%7:sreg_32 = S_MOV_B32 0
- %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, implicit $exec
%16:vgpr_32 = COPY %8.sub1
%11:vgpr_32 = COPY %16
%10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
%13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
- BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -162,7 +162,7 @@ body: |
%0:sgpr_32 = COPY $sgpr0
%4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
%5:sreg_32 = S_MOV_B32 0
- %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, implicit $exec
%8:sreg_64 = COPY $exec
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -172,7 +172,7 @@ body: |
early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
%14:vgpr_32 = COPY %13
- BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -210,20 +210,20 @@ body: |
undef %7.sub0:vreg_64 = COPY %2:vgpr_32
%7.sub1:vreg_64 = COPY %3:vgpr_32
- %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+ %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc
undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
%5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec
%6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec
- %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
+ %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4)
S_CBRANCH_SCC0 %bb.2, implicit $scc
bb.1:
%10:sreg_32 = S_MOV_B32 0
- BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET_exact %6:vgpr_32, %101:sgpr_128, %10:sreg_32, 4, 0, 0, 0, implicit $exec
S_ENDPGM 0
bb.2:
@@ -251,7 +251,7 @@ body: |
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:vgpr_32 = COPY $vgpr0
- %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, 0, implicit $exec
%2.sub0:vreg_64 = V_SET_INACTIVE_B32 %2.sub0:vreg_64, 0, implicit $exec, implicit-def $scc
%2.sub1:vreg_64 = V_SET_INACTIVE_B32 %2.sub1:vreg_64, 0, implicit $exec, implicit-def $scc
%3:vreg_64 = nnan nsz arcp contract reassoc nofpexcept V_MAX_F64_e64 0, %2:vreg_64, 0, %2:vreg_64, 0, 0, implicit $mode, implicit $exec
@@ -276,7 +276,7 @@ body: |
undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
%0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%1:sreg_64 = S_GETPC_B64
- %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0, 0
+ %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0
bb.1:
$vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
@@ -293,7 +293,7 @@ body: |
bb.4:
%3:sgpr_128 = IMPLICIT_DEF
- %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+ %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
$vgpr0 = COPY %4.sub0:vreg_128
$vgpr1 = COPY %4.sub1:vreg_128
SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
index a0818486af94..c8442e399da8 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
@@ -4,8 +4,8 @@
; Test that custom pseudo source values can be round trip serialized through MIR.
; CHECK-LABEL: {{^}}name: shader
-; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4, align 1, addrspace 4)
-; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
+; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4, align 1, addrspace 4)
+; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
; CHECK: DS_GWS_BARRIER %[[#]], 63, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource")
define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, <4 x i32> inreg %arg3) {
%bload0 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %arg3, i32 4, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir
index d96659c9dee1..659c4dbef31a 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir
@@ -32,7 +32,7 @@
}
...
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
---
name: test1
liveins:
@@ -48,7 +48,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -56,14 +56,14 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
---
name: test2
liveins:
@@ -79,7 +79,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -87,14 +87,14 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
---
name: test3
liveins:
@@ -110,7 +110,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -118,13 +118,13 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
---
name: test4
liveins:
@@ -140,7 +140,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -148,8 +148,8 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-scc.mir b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-scc.mir
index 9a889083564a..a168cdc7d2fe 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-scc.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-scc.mir
@@ -32,7 +32,7 @@
}
...
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
---
name: test1
liveins:
@@ -48,7 +48,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -56,14 +56,14 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
---
name: test2
liveins:
@@ -79,7 +79,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -87,14 +87,14 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
-# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
---
name: test3
liveins:
@@ -110,7 +110,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -118,13 +118,13 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
-# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
---
name: test4
liveins:
@@ -140,7 +140,7 @@ body: |
$sgpr3 = S_MOV_B32 61440
%0:sgpr_64 = COPY $sgpr0_sgpr1
- %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
+ %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%2:sgpr_32 = COPY $sgpr2
%3:sgpr_32 = COPY $sgpr3
%4:sgpr_128 = REG_SEQUENCE %1, %2, %3
@@ -148,8 +148,8 @@ body: |
%5:vgpr_32 = COPY $vgpr0
%6:vgpr_32 = COPY $vgpr1
- BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
- BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
+ BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
index 24193e93751d..785cd20d3196 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
@@ -12,14 +12,14 @@ body: |
; CHECK: %bb0_{{[0-9]+}}__1:sreg_32_xm0 = S_MOV_B32 0
; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY $vgpr0
; CHECK: %bb0_{{[0-9]+}}__1:sgpr_64 = COPY $sgpr0_sgpr1
- ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 9, 0, 0
- ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 11, 0, 0
+ ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 9, 0
+ ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 11, 0
; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1
; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1
; CHECK: %bb0_{{[0-9]+}}__2:vgpr_32 = COPY %bb0_{{[0-9]+}}__1
; CHECK: %bb0_{{[0-9]+}}__1:vreg_64 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1
; CHECK: %bb0_{{[0-9]+}}__1:sgpr_128 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1, %bb0_{{[0-9]+}}__1, %subreg.sub2, %bb0_{{[0-9]+}}__2, %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, 0, 0, 0, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
%10:sreg_32_xm0 = S_MOV_B32 61440
%11:sreg_32_xm0 = S_MOV_B32 0
@@ -29,13 +29,13 @@ body: |
%0:sgpr_64 = COPY $sgpr0_sgpr1
%vreg123_1:vgpr_32 = COPY %11
%27:vreg_64 = REG_SEQUENCE %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1
- %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0, 0
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0
%vreg123_2:vgpr_32 = COPY %4
- %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0, 0
+ %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0
%vreg123_3:vgpr_32 = COPY %5
%16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3
- BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
index 31f7d7b1b6a4..01828eb4f652 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
@@ -33,11 +33,11 @@ body: |
; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM
%0 = COPY $sgpr4_sgpr5
- %1 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %2 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: ( dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- %3 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: ( invariant load 8 from `i64 addrspace(4)* undef`)
- %4 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: ( load 8 from `i64 addrspace(4)* undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: ( load 8 from `i64 addrspace(2)* undef`)
- %6 = S_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: ( load 8 from `i64 addrspace(1)* undef`)
+ %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ %3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load 8 from `i64 addrspace(4)* undef`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(4)* undef`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(2)* undef`)
+ %6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(1)* undef`)
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
index 8a34e84d2d1a..be154815bbb9 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
@@ -12,7 +12,7 @@
# CHECK: isEntryFunction: true
# CHECK: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
# CHECK: frameOffsetReg: '$sgpr50'
-# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
name: reserve_correct_register
tracksRegLiveness: true
machineFunctionInfo:
@@ -24,6 +24,6 @@ stack:
body: |
bb.0:
- renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir
index d161a59bc42f..7b2ef70da2d6 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir
@@ -51,7 +51,7 @@ body: |
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @foo + 4, target-flags(amdgpu-gotprel32-hi) @foo + 12, implicit-def dead $scc
- renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0
+ renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0
SI_SPILL_S64_SAVE killed renamable $sgpr30_sgpr31, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
renamable $sgpr30_sgpr31 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
index 47ecb1552789..44155cd765e1 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
@@ -42,9 +42,9 @@
!0 = !{i32 1}
# GCN-LABEL: name: syncscopes
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
-# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4)
+# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4)
...
---
name: syncscopes
@@ -74,27 +74,27 @@ body: |
liveins: $sgpr4_sgpr5
S_WAITCNT 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
- $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
- $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out)
S_WAITCNT 112
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out)
S_WAITCNT 112
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
- FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
+ FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
index f6a608ec9b31..f9e5157c6d28 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
@@ -40,19 +40,19 @@ body: |
$sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start), implicit-def $scc, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
$sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
$sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
$sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
$sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
$sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
$sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@@ -70,18 +70,18 @@ body: |
$sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def $scc, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc
$sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0
$sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc
$sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc
$sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc
$sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc
$sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc
$sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc
- $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0
$sgpr7 = S_MOV_B32 61440
$sgpr6 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s
index 3b60a0ed2d08..2ea5b34eaf11 100644
--- a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s
+++ b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s
@@ -41,7 +41,7 @@ buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:7
// GFX908: encoding: [0x07,0x00,0x34,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 glc
-// GFX908-ERR: error: operands are not valid for this GPU or mode
+// GFX908-ERR: error: instruction must not use glc
buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 slc
// GFX908: encoding: [0xff,0x0f,0x36,0xe1,0x00,0x05,0x02,0x03]
@@ -86,7 +86,7 @@ buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:7
// GFX908: encoding: [0x07,0x00,0x38,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 glc
-// GFX908-ERR: error: operands are not valid for this GPU or mode
+// GFX908-ERR: error: instruction must not use glc
buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 slc
// GFX908: encoding: [0xff,0x0f,0x3a,0xe1,0x00,0x05,0x02,0x03]
diff --git a/llvm/test/MC/AMDGPU/cpol-err.s b/llvm/test/MC/AMDGPU/cpol-err.s
new file mode 100644
index 000000000000..1172c8a0e199
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/cpol-err.s
@@ -0,0 +1,46 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
+
+scratch_load_ubyte v1, v2, off cpol:2
+// CHECK: error: not a valid operand.
+// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off cpol:2
+// CHECK-NEXT:{{^}} ^
+
+scratch_load_ubyte v1, v2, off glc slc dlc
+// CHECK: error: dlc modifier is not supported on this GPU
+// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off glc slc dlc
+// CHECK-NEXT:{{^}} ^
+
+global_atomic_add v[3:4], v5, off slc glc
+// CHECK: error: instruction must not use glc
+// CHECK-NEXT:{{^}}global_atomic_add v[3:4], v5, off slc glc
+// CHECK-NEXT:{{^}} ^
+
+global_atomic_add v0, v[1:2], v2, off glc 1
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT:{{^}}global_atomic_add v0, v[1:2], v2, off glc 1
+// CHECK-NEXT:{{^}} ^
+
+global_load_dword v3, v[0:1], off slc glc noglc
+// CHECK: error: duplicate cache policy modifier
+// CHECK-NEXT:{{^}}global_load_dword v3, v[0:1], off slc glc noglc
+// CHECK-NEXT:{{^}} ^
+
+global_load_dword v3, v[0:1], off slc glc glc
+// CHECK: error: duplicate cache policy modifier
+// CHECK-NEXT:{{^}}global_load_dword v3, v[0:1], off slc glc glc
+// CHECK-NEXT:{{^}} ^
+
+global_load_dword v3, v[0:1], off slc noglc noglc
+// CHECK: error: duplicate cache policy modifier
+// CHECK-NEXT:{{^}}global_load_dword v3, v[0:1], off slc noglc noglc
+// CHECK-NEXT:{{^}} ^
+
+global_atomic_add v[3:4], v5, off slc noglc glc
+// CHECK: error: duplicate cache policy modifier
+// CHECK-NEXT:{{^}}global_atomic_add v[3:4], v5, off slc noglc glc
+// CHECK-NEXT:{{^}} ^
+
+s_load_dword s1, s[2:3], 0xfc glc slc
+// CHECK: error: invalid cache policy for SMRD instruction
+// CHECK-NEXT:{{^}}s_load_dword s1, s[2:3], 0xfc glc slc
+// CHECK-NEXT:{{^}}^
diff --git a/llvm/test/MC/AMDGPU/flat-gfx10.s b/llvm/test/MC/AMDGPU/flat-gfx10.s
index bb1aed9cbdb4..0e524fd312ac 100644
--- a/llvm/test/MC/AMDGPU/flat-gfx10.s
+++ b/llvm/test/MC/AMDGPU/flat-gfx10.s
@@ -38,10 +38,10 @@ flat_atomic_cmpswap v[1:2], v[3:4] slc
// GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00]
flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc
-// GFX10-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: instruction must not use glc
flat_atomic_cmpswap v[1:2], v[3:4] glc
-// GFX10-ERR: error: invalid operand for instruction
+// GFX10-ERR: error: instruction must not use glc
flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc
// GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00]
diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s
index da1ec062dece..e997c06bee3c 100644
--- a/llvm/test/MC/AMDGPU/flat-gfx9.s
+++ b/llvm/test/MC/AMDGPU/flat-gfx9.s
@@ -53,10 +53,11 @@ flat_atomic_cmpswap v[1:2], v[3:4] slc
// VI: flat_atomic_cmpswap v[1:2], v[3:4] slc ; encoding: [0x00,0x00,0x06,0xdd,0x01,0x03,0x00,0x00]
flat_atomic_cmpswap v[1:2], v[3:4] offset:4095 glc
-// GCNERR: error: invalid operand for instruction
+// GFX9-ERR: error: instruction must not use glc
+// VI-ERR: error: flat offset modifier is not supported on this GPU
flat_atomic_cmpswap v[1:2], v[3:4] glc
-// GCNERR: error: invalid operand for instruction
+// GCNERR: error: instruction must not use glc
flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc
// GFX9: flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x03,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s
index 42ee200b6bdb..38fa212175d8 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s
@@ -324,19 +324,19 @@ v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1]
// GFX90A: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 scc ; encoding: [0x00,0x80,0x09,0xe8,0x00,0x04,0x20,0x80]
// GFX1010: error: not a valid operand.
-// GFX908: error: failed parsing operand.
+// GFX908: error: scc modifier is not supported on this GPU
tbuffer_load_format_xyzw v[4:7], off, s[0:3], dfmt:1, nfmt:0, 0 scc
// GFX90A: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc scc ; encoding: [0x00,0xc0,0x09,0xe8,0x00,0x04,0x20,0x80]
// GFX1010: error: not a valid operand.
-// GFX908: error: failed parsing operand.
+// GFX908: error: scc modifier is not supported on this GPU
tbuffer_load_format_xyzw v[4:7], off, s[0:3], dfmt:1, nfmt:0, 0 glc scc
-// NOT-GFX90A: error: failed parsing operand
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
// GFX90A: buffer_load_dword v5, off, s[8:11], s3 offset:4095 scc ; encoding: [0xff,0x8f,0x50,0xe0,0x00,0x05,0x02,0x03]
buffer_load_dword v5, off, s[8:11], s3 offset:4095 scc
-// NOT-GFX90A: error: failed parsing operand
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
// GFX90A: buffer_load_dword v5, off, s[8:11], s3 offset:4095 glc scc ; encoding: [0xff,0xcf,0x50,0xe0,0x00,0x05,0x02,0x03]
buffer_load_dword v5, off, s[8:11], s3 offset:4095 glc scc
@@ -564,11 +564,11 @@ ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4
// GFX90A: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xf9,0xd8,0x01,0x02,0x00,0x04]
ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 gds
-// NOT-GFX90A: error: failed parsing operand
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
// GFX90A: flat_load_dword v0, v[0:1] scc ; encoding: [0x00,0x00,0x50,0xde,0x00,0x00,0x00,0x00]
flat_load_dword v0, v[0:1] scc
-// NOT-GFX90A: error: failed parsing operand
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
// GFX90A: flat_load_dword v0, v[0:1] glc scc ; encoding: [0x00,0x00,0x51,0xde,0x00,0x00,0x00,0x00]
flat_load_dword v0, v[0:1] glc scc
@@ -680,7 +680,7 @@ global_atomic_min_f64 v[0:1], v[2:3], off
// GFX90A: global_atomic_max_f64 v[0:1], v[2:3], off ; encoding: [0x00,0x80,0x44,0xdd,0x00,0x02,0x7f,0x00]
global_atomic_max_f64 v[0:1], v[2:3], off
-// NOT-GFX90A: error: failed parsing operand
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
// GFX90A: image_load v[0:4], v2, s[0:7] dmask:0xf unorm scc ; encoding: [0x80,0x1f,0x00,0xf0,0x02,0x00,0x00,0x00]
image_load v[0:4], v2, s[0:7] dmask:0xf unorm scc
@@ -973,17 +973,17 @@ v_xor_b32 v6, v29, v27 row_newbcast:15
// GFX90A: buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x34,0xe1,0x02,0x00,0x01,0x80]
// GFX1010: error: instruction not supported on this GPU
-// GFX908: error: operands are not valid for this GPU or mode
+// GFX908: error: instruction must not use glc
buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc
// GFX90A: buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x34,0xe1,0x02,0x00,0x01,0x80]
// GFX1010: error: instruction not supported on this GPU
-// GFX908: error: operands are not valid for this GPU or mode
+// GFX908: error: instruction must not use glc
buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc
// GFX90A: buffer_atomic_pk_add_f16 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x38,0xe1,0x02,0x00,0x01,0x80]
// GFX1010: error: instruction not supported on this GPU
-// GFX908: error: operands are not valid for this GPU or mode
+// GFX908: error: instruction must not use glc
buffer_atomic_pk_add_f16 v0, v2, s[4:7], 0 idxen glc
// GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x7f,0x00]
@@ -1021,7 +1021,7 @@ flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
// GFX90A: global_atomic_add v[2:3], v5, off scc ; encoding: [0x00,0x80,0x08,0xdf,0x02,0x05,0x7f,0x00]
-// NOT-GFX90A: error: failed parsing operand.
+// NOT-GFX90A: error: scc modifier is not supported on this GPU
global_atomic_add v[2:3], v5, off scc
// GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x7f,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index 438d36c8eb19..246291961599 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -196,37 +196,37 @@ image_sample_b v[0:3], v[0:1], s[4:11], s[16:19] dmask:0xf
// GFX90A: error: instruction not supported on this GPU
global_atomic_add_f32 v0, v[0:1], v2, off glc scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_add_f32 v[0:1], v2, off scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_add_f32 v0, v2, s[0:1] scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_add_f32 v1, v0, v2, s[0:1] glc scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_pk_add_f16 v0, v[0:1], v2, off glc scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
flat_atomic_add_f64 v[0:1], v[2:3] scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
flat_atomic_min_f64 v[0:1], v[2:3] scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
flat_atomic_max_f64 v[0:1], v[2:3] scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_add_f64 v[0:1], v[2:3], off scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_min_f64 v[0:1], v[2:3], off scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
global_atomic_max_f64 v[0:1], v[2:3], off scc
-// GFX90A: error: invalid operand for instruction
+// GFX90A: error: instruction must not use scc
diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx10.s b/llvm/test/MC/AMDGPU/mubuf-gfx10.s
index 78ade9eb4588..1c34a768fc41 100644
--- a/llvm/test/MC/AMDGPU/mubuf-gfx10.s
+++ b/llvm/test/MC/AMDGPU/mubuf-gfx10.s
@@ -4,7 +4,7 @@ buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds
// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x25,0xe0,0x00,0x05,0x42,0x03]
buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc
-// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03]
+// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc lds ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03]
buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc
// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03]
More information about the llvm-commits
mailing list