[llvm] fdaad48 - AMDGPU/GlobalISel: Initial selection of MUBUF addr64 load/store
Galina Kistanova via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 12:00:10 PST 2020
Hello Matt,
Your commit broke the test to couple of our builders:
http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/15535
http://lab.llvm.org:8011/builders/clang-with-thin-lto-ubuntu
. . .
Failing Tests (1):
LLVM :: CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
Please have a look ASAP?
Thanks
Galina
On Mon, Jan 27, 2020 at 7:14 AM Matt Arsenault via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
>
> Author: Matt Arsenault
> Date: 2020-01-27T07:13:56-08:00
> New Revision: fdaad485e620de39ea578e02535c6e75e44581ff
>
> URL:
> https://github.com/llvm/llvm-project/commit/fdaad485e620de39ea578e02535c6e75e44581ff
> DIFF:
> https://github.com/llvm/llvm-project/commit/fdaad485e620de39ea578e02535c6e75e44581ff.diff
>
> LOG: AMDGPU/GlobalISel: Initial selection of MUBUF addr64 load/store
>
> Fixes the main reason for compile failures on SI, but doesn't really
> try to use the addressing modes yet.
>
> Added:
>
>
> Modified:
> llvm/lib/Target/AMDGPU/AMDGPUGISel.td
> llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
> llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
> llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
> llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
> llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
>
> Removed:
>
>
>
>
> ################################################################################
> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
> b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
> index 44b27dde14cc..5cbbc283821f 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
> @@ -84,6 +84,10 @@ def gi_ds_1addr_1offset :
> GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
> GIComplexPatternEquiv<DS1Addr1Offset>;
>
> +def gi_mubuf_addr64 :
> + GIComplexOperandMatcher<s64, "selectMUBUFAddr64">,
> + GIComplexPatternEquiv<MUBUFAddr64>;
> +
>
> // Separate load nodes are defined to glue m0 initialization in
> // SelectionDAG. The GISel selector can just insert m0 initialization
>
> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
> b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
> index 2b6308dc1549..eb4c4e0eba01 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
> @@ -1343,6 +1343,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr,
> SDValue &Ptr,
> SDValue &TFE, SDValue &DLC,
> SDValue &SWZ) const {
> // Subtarget prefers to use flat instruction
> + // FIXME: This should be a pattern predicate and not reach here
> if (Subtarget->useFlatForGlobal())
> return false;
>
> @@ -1438,6 +1439,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue
> Addr, SDValue &SRsrc,
> SDValue Ptr, Offen, Idxen, Addr64;
>
> // addr64 bit was removed for volcanic islands.
> + // FIXME: This should be a pattern predicate and not reach here
> if (!Subtarget->hasAddr64())
> return false;
>
>
> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
> b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
> index ce47e56da02a..c580e72aefbb 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
> @@ -2552,6 +2552,84 @@
> AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const
> {
> }};
> }
>
> +static void addZeroImm(MachineInstrBuilder &MIB) {
> + MIB.addImm(0);
> +}
> +
> +/// Return a resource descriptor for use with an arbitrary 64-bit
> pointer. If \p
> +/// BasePtr is not valid, a null base pointer will be ussed.
> +static Register buildRSrc(MachineInstr *MI, MachineRegisterInfo &MRI,
> + const SIInstrInfo &TII, Register BasePtr) {
> + Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> + Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> + Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> + Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
> +
> + const DebugLoc &DL = MI->getDebugLoc();
> + MachineBasicBlock *BB = MI->getParent();
> +
> + // TODO: Try to use a real pointer if available.
> + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B32), RSrc2)
> + .addImm(0);
> + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B32), RSrc3)
> + .addImm(TII.getDefaultRsrcDataFormat() >> 32);
> +
> + // Build the half of the subregister with the constants before building
> the
> + // full 128-bit register. If we are building multiple resource
> descriptors,
> + // this will allow CSEing of the 2-component register.
> + BuildMI(*BB, MI, DL, TII.get(AMDGPU::REG_SEQUENCE), RSrcHi)
> + .addReg(RSrc2)
> + .addImm(AMDGPU::sub0)
> + .addReg(RSrc3)
> + .addImm(AMDGPU::sub1);
> +
> + Register RSrcLo = BasePtr;
> + if (!BasePtr) {
> + RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B64), RSrcLo)
> + .addImm(0);
> + }
> +
> + BuildMI(*BB, MI, DL, TII.get(AMDGPU::REG_SEQUENCE), RSrc)
> + .addReg(RSrcLo)
> + .addImm(AMDGPU::sub0_sub1)
> + .addReg(RSrcHi)
> + .addImm(AMDGPU::sub2_sub3);
> +
> + return RSrc;
> +}
> +
> +InstructionSelector::ComplexRendererFns
> +AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
> + // FIXME: Predicates should stop this from reaching here.
> + // addr64 bit was removed for volcanic islands.
> + if (!STI.hasAddr64() || STI.useFlatForGlobal())
> + return {};
> +
> + MachineInstr *MI = MRI->getVRegDef(Root.getReg());
> + Register VAddr = Root.getReg();
> + int64_t Offset = 0;
> +
> + // TODO: Attempt to use addressing modes. We need to look back through
> regbank
> + // copies to find a 64-bit SGPR base and VGPR offset.
> +
> + // FIXME: Use defaulted operands for trailing 0s and remove from the
> complex
> + // pattern.
> + return {{
> + [=](MachineInstrBuilder &MIB) { // rsrc
> + MIB.addReg(buildRSrc(MI, *MRI, TII, Register()));
> + },
> + [=](MachineInstrBuilder &MIB) { MIB.addReg(VAddr); }, // vaddr
> + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // soffset
> + addZeroImm, // offset
> + addZeroImm, // glc
> + addZeroImm, // slc
> + addZeroImm, // tfe
> + addZeroImm, // dlc
> + addZeroImm // swz
> + }};
> +}
> +
> void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
> const MachineInstr &MI,
> int OpIdx) const {
>
> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
> b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
> index d7dc8de3677d..94019ddf8ff5 100644
> --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
> +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
> @@ -180,6 +180,9 @@ class AMDGPUInstructionSelector : public
> InstructionSelector {
> InstructionSelector::ComplexRendererFns
> selectDS1Addr1Offset(MachineOperand &Root) const;
>
> + InstructionSelector::ComplexRendererFns
> + selectMUBUFAddr64(MachineOperand &Root) const;
> +
> void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
> int OpIdx = -1) const;
>
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
> index bf4a3a254c12..eebfbee8a12e 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
> @@ -1,5 +1,5 @@
> ; NOTE: Assertions have been autogenerated by
> utils/update_llc_test_checks.py
> -; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs
> < %s | FileCheck -check-prefix=GCN %s
> +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii
> -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefix=GCN %s
>
> ; End to end tests for scalar vs. vector boolean legalization strategies.
>
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
> index 1afa71ec4a30..e600aa0e1805 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
> @@ -1,6 +1,6 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
> -# RUN: llc -march=amdgcn -mcpu=gfx1010
> -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck
> -check-prefix=WAVE64 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010
> -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
>
> ---
>
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
> index b69776bb7b39..636b1d2dda69 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck
> -check-prefix=GFX7 %s
>
> ---
>
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
> index 0b82dd159d97..020e171d3fd5 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck
> -check-prefix=GFX7 %s
>
> # FIXME: Ideally this would fail to select with ieee mode enabled.
> ---
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
> index b5d9c8851bff..d6ac32e41543 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck
> -check-prefix=GFX7 %s
>
> ---
>
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
> index 2f7319c57ac4..1f4decb7826a 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck
> -check-prefix=GFX7 %s
>
> # FIXME: Ideally this would fail to select with ieee mode enabled.
> ---
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
> index eb4a5484d1ad..e6736f2d7147 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
> -check-prefix=GCN
>
> ---
>
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
> index 17da10515a36..b4ac02a72f61 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
> @@ -1,5 +1,5 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefixes=GCN %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0
> -o - %s | FileCheck -check-prefixes=GCN %s
> # XUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*'
> -o /dev/null %s 2>&1 | FileCheck -check-prefixes=ERR %s
>
> ---
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir
> index 1920b6b9f3bb..6321080d6bec 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir
> @@ -1,4 +1,4 @@
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
> -check-prefixes=GCN
>
> ---
>
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
> index 2e1a2ea089eb..ed7007fe5818 100644
> ---
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
> +++
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
> @@ -1,6 +1,7 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX6 %s
> # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX7 %s
> +# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global
> -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o
> - %s | FileCheck -check-prefix=GFX7-FLAT %s
> # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX9 %s
>
> ---
> @@ -16,14 +17,29 @@ body: |
>
> ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst
> ; GFX6: liveins: $vgpr0_vgpr1
> - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load
> seq_cst 4, addrspace 1)
> - ; GFX6: $vgpr0 = COPY [[LOAD]](s32)
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst
> 4, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst
> 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -56,6 +72,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 4, addrspace 1)
> ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD
> [[COPY]](p1) :: (load seq_cst 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
> ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -88,6 +109,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load
> seq_cst 4, addrspace 1)
> ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[LOAD]](p3)
> ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -112,14 +138,29 @@ body: |
>
> ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst
> ; GFX6: liveins: $vgpr0_vgpr1
> - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load
> seq_cst 8, addrspace 1)
> - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 =
> BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load seq_cst 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
> ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst
> 8, addrspace 1)
> - ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> + ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 =
> BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load seq_cst 8, addrspace 1)
> + ; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 =
> FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load seq_cst 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -152,6 +193,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD
> [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
> ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -184,6 +230,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD
> [[COPY]](p1) :: (load seq_cst 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
> ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -216,6 +267,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load
> seq_cst 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
> ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -248,6 +304,11 @@ body: |
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load
> seq_cst 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) ::
> (load seq_cst 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p0)
> ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -272,26 +333,59 @@ body: |
>
> ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
> ; GFX6: liveins: $vgpr0_vgpr1
> - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> - ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -2048
> - ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
> - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load
> seq_cst 4, addrspace 1)
> - ; GFX6: $vgpr0 = COPY [[LOAD]](s32)
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> seq_cst 4, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> seq_cst 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -328,26 +422,59 @@ body: |
>
> ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
> ; GFX6: liveins: $vgpr0_vgpr1
> - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> - ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
> - ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
> - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[GEP]](p1) :: (load
> seq_cst 4, addrspace 1)
> - ; GFX6: $vgpr0 = COPY [[LOAD]](s32)
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> seq_cst 4, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> seq_cst 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
> ; GFX9: liveins: $vgpr0_vgpr1
> ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
> index 2c070d65565d..3bbc8f0e016d 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
> @@ -1,5 +1,7 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX6 %s
> # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX7 %s
> +# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global
> -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o
> - %s | FileCheck -check-prefix=GFX7-FLAT %s
> # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX8 %s
> # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX9 %s
> # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX10 %s
> @@ -18,11 +20,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_4
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_4
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 4, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_4
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX8-LABEL: name: load_global_s32_from_4
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -56,11 +78,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_2
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 2, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_2
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2,
> addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
> + ; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 2, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_2
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2,
> addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
> ; GFX8-LABEL: name: load_global_s32_from_2
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -94,11 +136,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1,
> addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0,
> implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1,
> addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -132,11 +194,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v2s32
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 =
> BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
> ; GFX7-LABEL: name: load_global_v2s32
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8,
> addrspace 1)
> - ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> + ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 =
> BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 8, addrspace 1)
> + ; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_v2s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 =
> FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> ; GFX8-LABEL: name: load_global_v2s32
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -170,11 +252,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v3s32
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 =
> BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 12, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
> ; GFX7-LABEL: name: load_global_v3s32
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align
> 4, addrspace 1)
> - ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
> + ; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 =
> BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 12, align 4, addrspace 1)
> + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_v3s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 =
> FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 12, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
> ; GFX8-LABEL: name: load_global_v3s32
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -208,11 +310,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v4s32
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 =
> BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 16, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY
> [[BUFFER_LOAD_DWORDX4_ADDR64_]]
> ; GFX7-LABEL: name: load_global_v4s32
> ; GFX7: liveins: $vgpr0_vgpr1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align
> 4, addrspace 1)
> - ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
> + ; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 =
> BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0,
> 0, implicit $exec :: (load 16, align 4, addrspace 1)
> + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY
> [[BUFFER_LOAD_DWORDX4_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_v4s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 =
> FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 16, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
> ; GFX8-LABEL: name: load_global_v4s32
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -246,11 +368,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s64
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load
> 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
> ; GFX7-LABEL: name: load_global_s64
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8,
> addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s64
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 =
> FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> ; GFX8-LABEL: name: load_global_s64
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -284,11 +416,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v2s64
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
> ; GFX7-LABEL: name: load_global_v2s64
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align
> 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
> + ; GFX7-FLAT-LABEL: name: load_global_v2s64
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 =
> FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 16, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
> ; GFX8-LABEL: name: load_global_v2s64
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -322,11 +464,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v2p1
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
> ; GFX7-LABEL: name: load_global_v2p1
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
> + ; GFX7-FLAT-LABEL: name: load_global_v2p1
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD
> [[COPY]](p1) :: (load 16, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
> ; GFX8-LABEL: name: load_global_v2p1
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -359,11 +511,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s96
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load
> 12, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
> ; GFX7-LABEL: name: load_global_s96
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load
> 12, align 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
> + ; GFX7-FLAT-LABEL: name: load_global_s96
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) ::
> (load 12, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
> ; GFX8-LABEL: name: load_global_s96
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -396,11 +558,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s128
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
> ; GFX7-LABEL: name: load_global_s128
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
> + ; GFX7-FLAT-LABEL: name: load_global_s128
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
> ; GFX8-LABEL: name: load_global_s128
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -433,11 +605,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_p3_from_4
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load
> 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
> ; GFX7-LABEL: name: load_global_p3_from_4
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7-FLAT-LABEL: name: load_global_p3_from_4
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX8-LABEL: name: load_global_p3_from_4
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -471,11 +653,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_p1_from_8
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load
> 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
> ; GFX7-LABEL: name: load_global_p1_from_8
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8,
> addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> + ; GFX7-FLAT-LABEL: name: load_global_p1_from_8
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 =
> FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> ; GFX8-LABEL: name: load_global_p1_from_8
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -509,11 +701,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_p999_from_8
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load
> 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
> ; GFX7-LABEL: name: load_global_p999_from_8
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load
> 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
> + ; GFX7-FLAT-LABEL: name: load_global_p999_from_8
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) ::
> (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
> ; GFX8-LABEL: name: load_global_p999_from_8
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -546,11 +748,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v2p3
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) ::
> (load 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
> ; GFX7-LABEL: name: load_global_v2p3
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) ::
> (load 8, addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
> + ; GFX7-FLAT-LABEL: name: load_global_v2p3
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1)
> :: (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
> ; GFX8-LABEL: name: load_global_v2p3
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -583,11 +795,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v2s16
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 4, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
> ; GFX7-LABEL: name: load_global_v2s16
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> + ; GFX7-FLAT-LABEL: name: load_global_v2s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4,
> addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
> ; GFX8-LABEL: name: load_global_v2s16
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -621,11 +843,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v4s16
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 8, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
> ; GFX7-LABEL: name: load_global_v4s16
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2
> [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8,
> addrspace 1)
> ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> + ; GFX7-FLAT-LABEL: name: load_global_v4s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 =
> FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
> :: (load 8, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
> ; GFX8-LABEL: name: load_global_v4s16
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -659,11 +891,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v6s16
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<6 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 12, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
> ; GFX7-LABEL: name: load_global_v6s16
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 12, align 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
> + ; GFX7-FLAT-LABEL: name: load_global_v6s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD
> [[COPY]](p1) :: (load 12, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
> ; GFX8-LABEL: name: load_global_v6s16
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -696,11 +938,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_v8s16
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
> ; GFX7-LABEL: name: load_global_v8s16
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) ::
> (load 16, align 4, addrspace 1)
> ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
> + ; GFX7-FLAT-LABEL: name: load_global_v8s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD
> [[COPY]](p1) :: (load 16, align 4, addrspace 1)
> + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
> ; GFX8-LABEL: name: load_global_v8s16
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -737,21 +989,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_2047
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -797,21 +1089,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_2048
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -867,21 +1199,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2047
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965249, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965249, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965249, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -927,21 +1299,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2048
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294965248, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -987,21 +1399,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_4095
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1057,21 +1509,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_4096
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1137,21 +1629,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4095
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963201, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963201, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963201, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1207,21 +1739,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4096
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963200, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963200, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294963200, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1277,21 +1849,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_8191
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1357,21 +1969,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_8192
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1437,21 +2089,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8191
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959105, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959105, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959105, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -1517,21 +2209,61 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1
>
> + ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8192
> + ; GFX6: liveins: $vgpr0_vgpr1
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959104, implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
> ; GFX7: liveins: $vgpr0_vgpr1
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959104, implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> - ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 =
> BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0,
> 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
> + ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
> + ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32
> 4294959104, implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]],
> [[COPY2]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE
> [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load
> 1, addrspace 1)
> + ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
> ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
> ; GFX8: liveins: $vgpr0_vgpr1
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
> index 3e0de2289aaa..e68fda19d493 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
> @@ -1,6 +1,6 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
> -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select
> -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii
> -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck
> -check-prefix=WAVE64 %s
> +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010
> -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck
> -check-prefix=WAVE32 %s
>
> ---
>
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
> index 8486edd6c744..04735c7c5ac9 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
> @@ -1,5 +1,7 @@
> # NOTE: Assertions have been autogenerated by
> utils/update_mir_test_checks.py
> +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX6 %s
> # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX7 %s
> +# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global
> -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o
> - %s | FileCheck -check-prefix=GFX7-FLAT %s
> # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX8 %s
> # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX9 %s
> # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select
> -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck
> -check-prefix=GFX10 %s
> @@ -15,11 +17,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_s32_to_4
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_s32_to_4
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 4, addrspace 1)
> + ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s32_to_4
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
> ; GFX8-LABEL: name: store_global_s32_to_4
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -52,11 +74,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_s32_to_2
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_s32_to_2
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> - ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 2, addrspace 1)
> + ; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s32_to_2
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
> ; GFX8-LABEL: name: store_global_s32_to_2
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -89,11 +131,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_s32_to_1
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_s32_to_1
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> - ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 1, addrspace 1)
> + ; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s32_to_1
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
> ; GFX8-LABEL: name: store_global_s32_to_1
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -127,11 +189,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_global_s64
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
> + ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1)
> ; GFX7-LABEL: name: store_global_s64
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 8, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s64
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
> ; GFX8-LABEL: name: store_global_s64
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -164,11 +236,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
>
> + ; GFX6-LABEL: name: store_global_s96
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
> + ; GFX6: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_s96
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
> ; GFX7: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s96
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align
> 16, addrspace 1)
> ; GFX8-LABEL: name: store_global_s96
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -200,11 +282,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
>
> + ; GFX6-LABEL: name: store_global_s128
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace
> 1)
> ; GFX7-LABEL: name: store_global_s128
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace
> 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s128
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX8-LABEL: name: store_global_s128
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -237,11 +329,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_global_v2s32
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> + ; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v2s32
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 8, addrspace 1)
> + ; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v2s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
> ; GFX8-LABEL: name: store_global_v2s32
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -274,11 +386,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
>
> + ; GFX6-LABEL: name: store_global_v3s32
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
> + ; GFX6: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align
> 16, addrspace 1)
> ; GFX7-LABEL: name: store_global_v3s32
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
> - ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
> + ; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align
> 16, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v3s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
> ; GFX8-LABEL: name: store_global_v3s32
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -311,11 +443,31 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
>
> + ; GFX6-LABEL: name: store_global_v4s32
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v4s32
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
> - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 16, addrspace 1)
> + ; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]],
> [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v4s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
> ; GFX8-LABEL: name: store_global_v4s32
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -349,11 +501,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_v2s16
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
> + ; GFX6: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v2s16
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 4, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v2s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
> ; GFX8-LABEL: name: store_global_v2s16
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -387,11 +549,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_global_v4s16
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
> + ; GFX6: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v4s16
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 8, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v4s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
> ; GFX8-LABEL: name: store_global_v4s16
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -425,11 +597,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
>
> + ; GFX6-LABEL: name: store_global_v6s16
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
> + ; GFX6: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12,
> align 16, addrspace 1)
> ; GFX7-LABEL: name: store_global_v6s16
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
> ; GFX7: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12,
> align 16, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v6s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY
> $vgpr2_vgpr3_vgpr4
> + ; GFX7-FLAT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12,
> align 16, addrspace 1)
> ; GFX8-LABEL: name: store_global_v6s16
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -461,11 +643,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
>
> + ; GFX6-LABEL: name: store_global_v8s16
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v8s16
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v8s16
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX8-LABEL: name: store_global_v8s16
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -498,11 +690,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
>
> + ; GFX6-LABEL: name: store_global_v2s64
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s64>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v2s64
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 16, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v2s64
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
> ; GFX8-LABEL: name: store_global_v2s64
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -536,11 +738,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_global_p1
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3
> + ; GFX6: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1)
> ; GFX7-LABEL: name: store_global_p1
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 8, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_p1
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
> ; GFX8-LABEL: name: store_global_p1
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -574,11 +786,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
>
> + ; GFX6-LABEL: name: store_global_v2p1
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX6: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v2p1
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX7: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v2p1
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY
> $vgpr2_vgpr3_vgpr4_vgpr5
> + ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16,
> addrspace 1)
> ; GFX8-LABEL: name: store_global_v2p1
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -611,11 +833,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_p3
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
> + ; GFX6: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1)
> ; GFX7-LABEL: name: store_global_p3
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit
> $exec, implicit $flat_scr :: (store 4, addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_p3
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
> ; GFX8-LABEL: name: store_global_p3
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -649,11 +881,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_global_v2p3
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
> + ; GFX6: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_v2p3
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
> ; GFX7: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_v2p3
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8,
> addrspace 1)
> ; GFX8-LABEL: name: store_global_v2p3
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -685,11 +927,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_atomic_global_s32
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
> + ; GFX6: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4,
> addrspace 1)
> ; GFX7-LABEL: name: store_atomic_global_s32
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
> ; GFX7: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_atomic_global_s32
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
> + ; GFX7-FLAT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic
> 4, addrspace 1)
> ; GFX8-LABEL: name: store_atomic_global_s32
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -722,11 +974,21 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
>
> + ; GFX6-LABEL: name: store_atomic_global_s64
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
> + ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8,
> addrspace 1)
> ; GFX7-LABEL: name: store_atomic_global_s64
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
> ; GFX7: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_atomic_global_s64
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
> + ; GFX7-FLAT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic
> 8, addrspace 1)
> ; GFX8-LABEL: name: store_atomic_global_s64
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
> ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
> @@ -759,6 +1021,26 @@ body: |
> bb.0:
> liveins: $vgpr0_vgpr1, $vgpr2
>
> + ; GFX6-LABEL: name: store_global_s32_gep_2047
> + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]],
> [[COPY3]], 0, implicit $exec
> + ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE3]],
> [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4,
> addrspace 1)
> ; GFX7-LABEL: name: store_global_s32_gep_2047
> ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> @@ -766,14 +1048,34 @@ body: |
> ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
> + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
> + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE
> [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
> + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
> + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE
> [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
> ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]],
> [[COPY3]], 0, implicit $exec
> - ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> - ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0,
> implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
> + ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
> + ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE3]],
> [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4,
> addrspace 1)
> + ; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047
> + ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
> + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
> + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
> + ; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047,
> implicit $exec
> + ; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0,
> implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
> + ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
> + ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
> + ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
> + ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
> + ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32,
> [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]],
> [[COPY3]], 0, implicit $exec
> + ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64
> [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
> + ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE
> [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
> + ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0,
> 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
> ; GFX8-LABEL: name: store_global_s32_gep_2047
> ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
> ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
> index b1e389053da2..4945f7a338b6 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
> @@ -1,6 +1,6 @@
> ; NOTE: Assertions have been autogenerated by
> utils/update_llc_test_checks.py
> -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs
> < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
> -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global
> -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
> +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire
> -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CI %s
> +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tonga
> -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
>
> ; FIXME: Merge with other test. DS offset folding doesn't work due to
> ; register bank copies, and no return optimization is missing.
> @@ -19,30 +19,30 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
> define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out,
> i32 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_dec_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v1, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s2
> ; CI-NEXT: ds_dec_rtn_u32 v2, v1, v0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> +; CI-NEXT: v_mov_b32_e32 v0, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dword v[0:1], v2
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_dec_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v1, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s2
> ; VI-NEXT: ds_dec_rtn_u32 v2, v1, v0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> +; VI-NEXT: v_mov_b32_e32 v0, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dword v[0:1], v2
> ; VI-NEXT: s_endpgm
> @@ -67,32 +67,32 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32
> addrspace(1)* %out, i32 ad
> define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32
> addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_dec_ret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 16
> -; CI-NEXT: v_mov_b32_e32 v1, s0
> +; CI-NEXT: s_add_u32 s2, s2, 16
> +; CI-NEXT: v_mov_b32_e32 v1, s2
> ; CI-NEXT: ds_dec_rtn_u32 v2, v1, v0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> +; CI-NEXT: v_mov_b32_e32 v0, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dword v[0:1], v2
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_dec_ret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 16
> -; VI-NEXT: v_mov_b32_e32 v1, s0
> +; VI-NEXT: s_add_u32 s2, s2, 16
> +; VI-NEXT: v_mov_b32_e32 v1, s2
> ; VI-NEXT: ds_dec_rtn_u32 v2, v1, v0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> +; VI-NEXT: v_mov_b32_e32 v0, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dword v[0:1], v2
> ; VI-NEXT: s_endpgm
> @@ -117,9 +117,26 @@ define amdgpu_kernel void
> @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out,
> }
>
> define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)*
> %ptr) nounwind {
> +; GCN-LABEL: lds_atomic_dec_noret_i32:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dword s0, s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v0, 42
> +; GCN-NEXT: s_mov_b32 m0, -1
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v1, s0
> +; GCN-NEXT: ds_dec_rtn_u32 v0, v1, v0
> +; GCN-NEXT: s_endpgm
> +; GFX9-LABEL: lds_atomic_dec_noret_i32:
> +; GFX9: ; %bb.0:
> +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> +; GFX9-NEXT: v_mov_b32_e32 v1, 42
> +; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> +; GFX9-NEXT: v_mov_b32_e32 v0, s0
> +; GFX9-NEXT: ds_dec_rtn_u32 v0, v0, v1
> +; GFX9-NEXT: s_endpgm
> ; CI-LABEL: lds_atomic_dec_noret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -129,21 +146,13 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun
> ;
> ; VI-LABEL: lds_atomic_dec_noret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v1, s0
> ; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0
> ; VI-NEXT: s_endpgm
> -; GFX9-LABEL: lds_atomic_dec_noret_i32:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v1, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: ds_dec_rtn_u32 v0, v0, v1
> -; GFX9-NEXT: s_endpgm
> %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)*
> %ptr, i32 42, i32 0, i32 0, i1 false)
> ret void
> }
> @@ -151,7 +160,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun
> define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32
> addrspace(3)* %ptr) nounwind {
> ; CI-LABEL: lds_atomic_dec_noret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -162,7 +171,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt
> ;
> ; VI-LABEL: lds_atomic_dec_noret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -187,7 +196,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt
> define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)*
> %out, i32 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_dec_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: v_mov_b32_e32 v0, s2
> @@ -201,7 +210,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32
> ;
> ; VI-LABEL: global_atomic_dec_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v0, s2
> @@ -233,7 +242,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32
> define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32
> addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_dec_ret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: s_add_u32 s2, s2, 16
> @@ -249,7 +258,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o
> ;
> ; VI-LABEL: global_atomic_dec_ret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: s_add_u32 s2, s2, 16
> @@ -286,7 +295,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o
> define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)*
> %ptr) nounwind {
> ; CI-LABEL: global_atomic_dec_noret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: v_mov_b32_e32 v0, s0
> @@ -296,7 +305,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n
> ;
> ; VI-LABEL: global_atomic_dec_noret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v0, s0
> @@ -319,7 +328,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n
> define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32
> addrspace(1)* %ptr) nounwind {
> ; CI-LABEL: global_atomic_dec_noret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: s_add_u32 s0, s0, 16
> @@ -331,7 +340,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32_offset(i32 addrspace(1)*
> ;
> ; VI-LABEL: global_atomic_dec_noret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: s_add_u32 s0, s0, 16
> @@ -362,7 +371,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -387,7 +396,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -445,7 +454,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -465,7 +474,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -508,7 +517,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa
> define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr)
> #0 {
> ; CI-LABEL: flat_atomic_dec_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: v_mov_b32_e32 v0, s2
> @@ -522,7 +531,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
> ;
> ; VI-LABEL: flat_atomic_dec_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v0, s2
> @@ -554,7 +563,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
> define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32*
> %ptr) #0 {
> ; CI-LABEL: flat_atomic_dec_ret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: s_add_u32 s2, s2, 16
> @@ -570,7 +579,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr)
> ;
> ; VI-LABEL: flat_atomic_dec_ret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: s_add_u32 s2, s2, 16
> @@ -607,7 +616,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr)
> define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
> ; CI-LABEL: flat_atomic_dec_noret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: v_mov_b32_e32 v0, s0
> @@ -617,7 +626,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
> ;
> ; VI-LABEL: flat_atomic_dec_noret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v0, s0
> @@ -640,7 +649,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
> define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr)
> nounwind {
> ; CI-LABEL: flat_atomic_dec_noret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: s_add_u32 s0, s0, 16
> @@ -652,7 +661,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind
> ;
> ; VI-LABEL: flat_atomic_dec_noret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: s_add_u32 s0, s0, 16
> @@ -683,7 +692,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32*
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -708,7 +717,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32*
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -766,7 +775,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -786,7 +795,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -829,7 +838,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0
> define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr)
> #0 {
> ; CI-LABEL: flat_atomic_dec_ret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -844,7 +853,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
> ;
> ; VI-LABEL: flat_atomic_dec_ret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -878,7 +887,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
> define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64*
> %ptr) #0 {
> ; CI-LABEL: flat_atomic_dec_ret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -895,7 +904,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr)
> ;
> ; VI-LABEL: flat_atomic_dec_ret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -934,7 +943,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr)
> define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
> ; CI-LABEL: flat_atomic_dec_noret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -945,7 +954,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
> ;
> ; VI-LABEL: flat_atomic_dec_noret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -970,7 +979,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
> define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr)
> nounwind {
> ; CI-LABEL: flat_atomic_dec_noret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -983,7 +992,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind
> ;
> ; VI-LABEL: flat_atomic_dec_noret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1016,7 +1025,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64*
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1042,7 +1051,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64*
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1102,7 +1111,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1123,7 +1132,7 @@ define amdgpu_kernel void
> @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1171,7 +1180,7 @@ define amdgpu_kernel void
> @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
> ; CI-LABEL: atomic_dec_shl_base_lds_0:
> ; CI: ; %bb.0:
> ; CI-NEXT: v_mul_lo_u32 v5, 4, v0
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v6, 9
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5
> @@ -1190,7 +1199,7 @@ define amdgpu_kernel void
> @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
> ; VI-LABEL: atomic_dec_shl_base_lds_0:
> ; VI: ; %bb.0:
> ; VI-NEXT: v_mul_lo_u32 v5, 4, v0
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v6, 9
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5
> @@ -1234,32 +1243,32 @@ define amdgpu_kernel void
> @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
> define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out,
> i64 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_dec_ret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> ; CI-NEXT: v_mov_b32_e32 v2, s2
> -; CI-NEXT: v_mov_b32_e32 v3, s3
> +; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: v_mov_b32_e32 v3, s1
> +; CI-NEXT: v_mov_b32_e32 v2, s0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_dec_ret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> ; VI-NEXT: v_mov_b32_e32 v2, s2
> -; VI-NEXT: v_mov_b32_e32 v3, s3
> +; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: v_mov_b32_e32 v3, s1
> +; VI-NEXT: v_mov_b32_e32 v2, s0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; VI-NEXT: s_endpgm
> @@ -1285,34 +1294,34 @@ define amdgpu_kernel void
> @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad
> define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64
> addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_dec_ret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 32
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: s_add_u32 s2, s2, 32
> ; CI-NEXT: v_mov_b32_e32 v2, s2
> -; CI-NEXT: v_mov_b32_e32 v3, s3
> +; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: v_mov_b32_e32 v3, s1
> +; CI-NEXT: v_mov_b32_e32 v2, s0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_dec_ret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 32
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: s_add_u32 s2, s2, 32
> ; VI-NEXT: v_mov_b32_e32 v2, s2
> -; VI-NEXT: v_mov_b32_e32 v3, s3
> +; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: v_mov_b32_e32 v3, s1
> +; VI-NEXT: v_mov_b32_e32 v2, s0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; VI-NEXT: s_endpgm
> @@ -1340,7 +1349,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out,
> define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)*
> %ptr) nounwind {
> ; CI-LABEL: lds_atomic_dec_noret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> @@ -1351,7 +1360,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun
> ;
> ; VI-LABEL: lds_atomic_dec_noret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> @@ -1375,7 +1384,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun
> define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64
> addrspace(3)* %ptr) nounwind {
> ; CI-LABEL: lds_atomic_dec_noret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> @@ -1387,7 +1396,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt
> ;
> ; VI-LABEL: lds_atomic_dec_noret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> @@ -1414,7 +1423,7 @@ define amdgpu_kernel void
> @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt
> define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)*
> %out, i64 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_dec_ret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1429,7 +1438,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64
> ;
> ; VI-LABEL: global_atomic_dec_ret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1463,7 +1472,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64
> define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64
> addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_dec_ret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1480,7 +1489,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o
> ;
> ; VI-LABEL: global_atomic_dec_ret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1519,7 +1528,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o
> define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)*
> %ptr) nounwind {
> ; CI-LABEL: global_atomic_dec_noret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1530,7 +1539,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n
> ;
> ; VI-LABEL: global_atomic_dec_noret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1555,7 +1564,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n
> define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64
> addrspace(1)* %ptr) nounwind {
> ; CI-LABEL: global_atomic_dec_noret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1568,7 +1577,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i64_offset(i64 addrspace(1)*
> ;
> ; VI-LABEL: global_atomic_dec_noret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -1601,7 +1610,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1627,7 +1636,7 @@ define amdgpu_kernel void
> @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1687,7 +1696,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1708,7 +1717,7 @@ define amdgpu_kernel void
> @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1756,7 +1765,7 @@ define amdgpu_kernel void
> @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> ; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
> ; CI: ; %bb.0:
> ; CI-NEXT: v_mul_lo_u32 v7, 8, v0
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0
> ; CI-NEXT: v_mov_b32_e32 v0, 9
> ; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7
> @@ -1776,7 +1785,7 @@ define amdgpu_kernel void
> @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> ; VI-LABEL: atomic_dec_shl_base_lds_0_i64:
> ; VI: ; %bb.0:
> ; VI-NEXT: v_mul_lo_u32 v7, 8, v0
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0
> ; VI-NEXT: v_mov_b32_e32 v0, 9
> ; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
> index 454034b0a428..07f45eaf2b8a 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
> @@ -1,7 +1,7 @@
> ; NOTE: Assertions have been autogenerated by
> utils/update_llc_test_checks.py
> -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs
> < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
> -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global
> -verify-machineinstrs < %s | FileCheck -enable-var-scope
> -check-prefixes=GCN,VI %s
> -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs
> < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
> +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire
> -verify-machineinstrs < %s | FileCheck -enable-var-scope
> -check-prefixes=GCN,CI %s
> +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tonga
> -verify-machineinstrs < %s | FileCheck -enable-var-scope
> -check-prefixes=GCN,VI %s
> +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900
> -verify-machineinstrs < %s | FileCheck -enable-var-scope
> -check-prefixes=GCN,GFX9 %s
>
> ; FIXME: Merge with other test. DS offset folding doesn't work due to
> ; register bank copies, and no return optimization is missing.
> @@ -20,44 +20,44 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
> define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out,
> i32 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_inc_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v1, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s2
> ; CI-NEXT: ds_inc_rtn_u32 v2, v1, v0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> +; CI-NEXT: v_mov_b32_e32 v0, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dword v[0:1], v2
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_inc_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v1, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s2
> ; VI-NEXT: ds_inc_rtn_u32 v2, v1, v0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> +; VI-NEXT: v_mov_b32_e32 v0, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dword v[0:1], v2
> ; VI-NEXT: s_endpgm
> ;
> ; GFX9-LABEL: lds_atomic_inc_ret_i32:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
> ; GFX9-NEXT: v_mov_b32_e32 v1, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: ds_inc_rtn_u32 v2, v0, v1
> ; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> +; GFX9-NEXT: ds_inc_rtn_u32 v2, v0, v1
> +; GFX9-NEXT: v_mov_b32_e32 v0, s0
> +; GFX9-NEXT: v_mov_b32_e32 v1, s1
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: global_store_dword v[0:1], v2, off
> ; GFX9-NEXT: s_endpgm
> @@ -71,47 +71,47 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32
> addrspace(1)* %out, i32 ad
> define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32
> addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_inc_ret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 16
> -; CI-NEXT: v_mov_b32_e32 v1, s0
> +; CI-NEXT: s_add_u32 s2, s2, 16
> +; CI-NEXT: v_mov_b32_e32 v1, s2
> ; CI-NEXT: ds_inc_rtn_u32 v2, v1, v0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> +; CI-NEXT: v_mov_b32_e32 v0, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dword v[0:1], v2
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_inc_ret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 16
> -; VI-NEXT: v_mov_b32_e32 v1, s0
> +; VI-NEXT: s_add_u32 s2, s2, 16
> +; VI-NEXT: v_mov_b32_e32 v1, s2
> ; VI-NEXT: ds_inc_rtn_u32 v2, v1, v0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> +; VI-NEXT: v_mov_b32_e32 v0, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dword v[0:1], v2
> ; VI-NEXT: s_endpgm
> ;
> ; GFX9-LABEL: lds_atomic_inc_ret_i32_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
> ; GFX9-NEXT: v_mov_b32_e32 v1, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 16
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: ds_inc_rtn_u32 v2, v0, v1
> +; GFX9-NEXT: s_add_u32 s2, s2, 16
> ; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> +; GFX9-NEXT: ds_inc_rtn_u32 v2, v0, v1
> +; GFX9-NEXT: v_mov_b32_e32 v0, s0
> +; GFX9-NEXT: v_mov_b32_e32 v1, s1
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: global_store_dword v[0:1], v2, off
> ; GFX9-NEXT: s_endpgm
> @@ -124,7 +124,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out,
> define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)*
> %ptr) nounwind {
> ; CI-LABEL: lds_atomic_inc_noret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -134,7 +134,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun
> ;
> ; VI-LABEL: lds_atomic_inc_noret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -144,7 +144,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun
> ;
> ; GFX9-LABEL: lds_atomic_inc_noret_i32:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v1, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: v_mov_b32_e32 v0, s0
> @@ -157,7 +157,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun
> define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32
> addrspace(3)* %ptr) nounwind {
> ; CI-LABEL: lds_atomic_inc_noret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -168,7 +168,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt
> ;
> ; VI-LABEL: lds_atomic_inc_noret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -179,7 +179,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt
> ;
> ; GFX9-LABEL: lds_atomic_inc_noret_i32_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v1, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: s_add_u32 s0, s0, 16
> @@ -194,7 +194,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt
> define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)*
> %out, i32 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_inc_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: v_mov_b32_e32 v0, s2
> @@ -208,7 +208,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32
> ;
> ; VI-LABEL: global_atomic_inc_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: v_mov_b32_e32 v0, s2
> @@ -222,7 +222,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32
> ;
> ; GFX9-LABEL: global_atomic_inc_ret_i32:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v2, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: v_mov_b32_e32 v0, s2
> @@ -241,7 +241,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32
> define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32
> addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_inc_ret_i32_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: s_add_u32 s2, s2, 16
> @@ -257,7 +257,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o
> ;
> ; VI-LABEL: global_atomic_inc_ret_i32_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: s_add_u32 s2, s2, 16
> @@ -273,7 +273,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o
> ;
> ; GFX9-LABEL: global_atomic_inc_ret_i32_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v2, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: s_add_u32 s2, s2, 16
> @@ -293,75 +293,31 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o
> }
>
> define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)*
> %ptr) nounwind {
> -; CI-LABEL: global_atomic_inc_noret_i32:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: global_atomic_inc_noret_i32:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: global_atomic_inc_noret_i32:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: global_atomic_inc_noret_i32:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> +; GCN-NEXT: s_endpgm
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)*
> %ptr, i32 42, i32 0, i32 0, i1 false)
> ret void
> }
>
> define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32
> addrspace(1)* %ptr) nounwind {
> -; CI-LABEL: global_atomic_inc_noret_i32_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 16
> -; CI-NEXT: s_addc_u32 s1, s1, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: global_atomic_inc_noret_i32_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 16
> -; VI-NEXT: s_addc_u32 s1, s1, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: global_atomic_inc_noret_i32_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 16
> -; GFX9-NEXT: s_addc_u32 s1, s1, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: global_atomic_inc_noret_i32_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s0, s0, 16
> +; GCN-NEXT: s_addc_u32 s1, s1, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)*
> %gep, i32 42, i32 0, i32 0, i1 false)
> ret void
> @@ -373,7 +329,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -398,7 +354,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -421,7 +377,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace
> ; GFX9-LABEL: global_atomic_inc_ret_i32_offset_addr64:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v1
> ; GFX9-NEXT: v_mul_hi_u32 v3, 4, v0
> @@ -456,7 +412,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -476,7 +432,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -494,7 +450,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
> ; GFX9-LABEL: global_atomic_inc_noret_i32_offset_addr64:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_hi_u32 v3, 4, v0
> ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v1
> @@ -522,7 +478,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
> ; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
> ; CI: ; %bb.0:
> ; CI-NEXT: v_mul_lo_u32 v5, 4, v0
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v6, 9
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5
> @@ -541,7 +497,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
> ; VI-LABEL: atomic_inc_shl_base_lds_0_i32:
> ; VI: ; %bb.0:
> ; VI-NEXT: v_mul_lo_u32 v5, 4, v0
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v6, 9
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5
> @@ -560,7 +516,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
> ; GFX9-LABEL: atomic_inc_shl_base_lds_0_i32:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v0
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_add_u32_e32 v3, 2, v0
> ; GFX9-NEXT: v_mov_b32_e32 v2, 9
> ; GFX9-NEXT: v_add_u32_e32 v0, 0, v1
> @@ -586,47 +542,47 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
> define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out,
> i64 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_inc_ret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> ; CI-NEXT: v_mov_b32_e32 v2, s2
> -; CI-NEXT: v_mov_b32_e32 v3, s3
> +; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: v_mov_b32_e32 v3, s1
> +; CI-NEXT: v_mov_b32_e32 v2, s0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_inc_ret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> ; VI-NEXT: v_mov_b32_e32 v2, s2
> -; VI-NEXT: v_mov_b32_e32 v3, s3
> +; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: v_mov_b32_e32 v3, s1
> +; VI-NEXT: v_mov_b32_e32 v2, s0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; VI-NEXT: s_endpgm
> ;
> ; GFX9-LABEL: lds_atomic_inc_ret_i64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
> ; GFX9-NEXT: v_mov_b32_e32 v0, 42
> ; GFX9-NEXT: v_mov_b32_e32 v1, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v2, s0
> -; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> ; GFX9-NEXT: v_mov_b32_e32 v2, s2
> -; GFX9-NEXT: v_mov_b32_e32 v3, s3
> +; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; GFX9-NEXT: v_mov_b32_e32 v3, s1
> +; GFX9-NEXT: v_mov_b32_e32 v2, s0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
> ; GFX9-NEXT: s_endpgm
> @@ -638,50 +594,50 @@ define amdgpu_kernel void
> @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ad
> define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64
> addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: lds_atomic_inc_ret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xb
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s2, s[4:5], 0x2
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 32
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: s_add_u32 s2, s2, 32
> ; CI-NEXT: v_mov_b32_e32 v2, s2
> -; CI-NEXT: v_mov_b32_e32 v3, s3
> +; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; CI-NEXT: v_mov_b32_e32 v3, s1
> +; CI-NEXT: v_mov_b32_e32 v2, s0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> ; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; CI-NEXT: s_endpgm
> ;
> ; VI-LABEL: lds_atomic_inc_ret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s2, s[4:5], 0x8
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 32
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: s_add_u32 s2, s2, 32
> ; VI-NEXT: v_mov_b32_e32 v2, s2
> -; VI-NEXT: v_mov_b32_e32 v3, s3
> +; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; VI-NEXT: v_mov_b32_e32 v3, s1
> +; VI-NEXT: v_mov_b32_e32 v2, s0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> ; VI-NEXT: s_endpgm
> ;
> ; GFX9-LABEL: lds_atomic_inc_ret_i64_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x2c
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
> ; GFX9-NEXT: v_mov_b32_e32 v0, 42
> ; GFX9-NEXT: v_mov_b32_e32 v1, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 32
> -; GFX9-NEXT: v_mov_b32_e32 v2, s0
> -; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; GFX9-NEXT: s_add_u32 s2, s2, 32
> ; GFX9-NEXT: v_mov_b32_e32 v2, s2
> -; GFX9-NEXT: v_mov_b32_e32 v3, s3
> +; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
> +; GFX9-NEXT: v_mov_b32_e32 v3, s1
> +; GFX9-NEXT: v_mov_b32_e32 v2, s0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
> ; GFX9-NEXT: s_endpgm
> @@ -694,7 +650,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out,
> define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)*
> %ptr) nounwind {
> ; CI-LABEL: lds_atomic_inc_noret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> @@ -705,7 +661,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun
> ;
> ; VI-LABEL: lds_atomic_inc_noret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> @@ -716,7 +672,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun
> ;
> ; GFX9-LABEL: lds_atomic_inc_noret_i64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v0, 42
> ; GFX9-NEXT: v_mov_b32_e32 v1, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> @@ -730,7 +686,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun
> define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64
> addrspace(3)* %ptr) nounwind {
> ; CI-LABEL: lds_atomic_inc_noret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dword s0, s[0:1], 0x9
> +; CI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v0, 42
> ; CI-NEXT: v_mov_b32_e32 v1, 0
> ; CI-NEXT: s_mov_b32 m0, -1
> @@ -742,7 +698,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt
> ;
> ; VI-LABEL: lds_atomic_inc_noret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x24
> +; VI-NEXT: s_load_dword s0, s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v0, 42
> ; VI-NEXT: v_mov_b32_e32 v1, 0
> ; VI-NEXT: s_mov_b32 m0, -1
> @@ -754,7 +710,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt
> ;
> ; GFX9-LABEL: lds_atomic_inc_noret_i64_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
> +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v0, 42
> ; GFX9-NEXT: v_mov_b32_e32 v1, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> @@ -770,7 +726,7 @@ define amdgpu_kernel void
> @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt
> define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)*
> %out, i64 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_inc_ret_i64:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -785,7 +741,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64
> ;
> ; VI-LABEL: global_atomic_inc_ret_i64:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -800,7 +756,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64
> ;
> ; GFX9-LABEL: global_atomic_inc_ret_i64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v2, 42
> ; GFX9-NEXT: v_mov_b32_e32 v3, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> @@ -820,7 +776,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64
> define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64
> addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
> ; CI-LABEL: global_atomic_inc_ret_i64_offset:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: v_mov_b32_e32 v3, 0
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -837,7 +793,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o
> ;
> ; VI-LABEL: global_atomic_inc_ret_i64_offset:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: v_mov_b32_e32 v3, 0
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> @@ -854,7 +810,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o
> ;
> ; GFX9-LABEL: global_atomic_inc_ret_i64_offset:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v2, 42
> ; GFX9-NEXT: v_mov_b32_e32 v3, 0
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> @@ -875,81 +831,33 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o
> }
>
> define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)*
> %ptr) nounwind {
> -; CI-LABEL: global_atomic_inc_noret_i64:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: global_atomic_inc_noret_i64:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: global_atomic_inc_noret_i64:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: global_atomic_inc_noret_i64:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: s_endpgm
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)*
> %ptr, i64 42, i32 0, i32 0, i1 false)
> ret void
> }
>
> define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64
> addrspace(1)* %ptr) nounwind {
> -; CI-LABEL: global_atomic_inc_noret_i64_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 32
> -; CI-NEXT: s_addc_u32 s1, s1, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: global_atomic_inc_noret_i64_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 32
> -; VI-NEXT: s_addc_u32 s1, s1, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: global_atomic_inc_noret_i64_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 32
> -; GFX9-NEXT: s_addc_u32 s1, s1, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: global_atomic_inc_noret_i64_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s0, s0, 32
> +; GCN-NEXT: s_addc_u32 s1, s1, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)*
> %gep, i64 42, i32 0, i32 0, i1 false)
> ret void
> @@ -961,7 +869,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -987,7 +895,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1010,7 +918,7 @@ define amdgpu_kernel void
> @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace
> ;
> ; GFX9-LABEL: global_atomic_inc_ret_i64_offset_addr64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v1
> @@ -1047,7 +955,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1068,7 +976,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1086,7 +994,7 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa
> ;
> ; GFX9-LABEL: global_atomic_inc_noret_i64_offset_addr64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v1
> @@ -1111,100 +1019,40 @@ define amdgpu_kernel void
> @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa
> }
>
> define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr)
> #0 {
> -; CI-LABEL: flat_atomic_inc_ret_i32:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> -; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; CI-NEXT: flat_store_dword v[0:1], v2
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_ret_i32:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> -; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; VI-NEXT: flat_store_dword v[0:1], v2
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_ret_i32:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> -; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; GFX9-NEXT: flat_store_dword v[0:1], v2
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_ret_i32:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s2
> +; GCN-NEXT: v_mov_b32_e32 v1, s3
> +; GCN-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> +; GCN-NEXT: flat_store_dword v[0:1], v2
> +; GCN-NEXT: s_endpgm
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42,
> i32 0, i32 0, i1 false)
> store i32 %result, i32* %out
> ret void
> }
>
> define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32*
> %ptr) #0 {
> -; CI-LABEL: flat_atomic_inc_ret_i32_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s2, s2, 16
> -; CI-NEXT: s_addc_u32 s3, s3, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> -; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; CI-NEXT: flat_store_dword v[0:1], v2
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_ret_i32_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s2, s2, 16
> -; VI-NEXT: s_addc_u32 s3, s3, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> -; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; VI-NEXT: flat_store_dword v[0:1], v2
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_ret_i32_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s2, s2, 16
> -; GFX9-NEXT: s_addc_u32 s3, s3, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> -; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; GFX9-NEXT: flat_store_dword v[0:1], v2
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_ret_i32_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s2, s2, 16
> +; GCN-NEXT: s_addc_u32 s3, s3, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s2
> +; GCN-NEXT: v_mov_b32_e32 v1, s3
> +; GCN-NEXT: flat_atomic_inc v2, v[0:1], v2 glc
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> +; GCN-NEXT: flat_store_dword v[0:1], v2
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i32, i32* %ptr, i32 4
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42,
> i32 0, i32 0, i1 false)
> store i32 %result, i32* %out
> @@ -1212,75 +1060,31 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr)
> }
>
> define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
> -; CI-LABEL: flat_atomic_inc_noret_i32:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_noret_i32:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_noret_i32:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_noret_i32:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> +; GCN-NEXT: s_endpgm
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42,
> i32 0, i32 0, i1 false)
> ret void
> }
>
> define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr)
> nounwind {
> -; CI-LABEL: flat_atomic_inc_noret_i32_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 16
> -; CI-NEXT: s_addc_u32 s1, s1, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_noret_i32_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 16
> -; VI-NEXT: s_addc_u32 s1, s1, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_noret_i32_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 16
> -; GFX9-NEXT: s_addc_u32 s1, s1, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_noret_i32_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s0, s0, 16
> +; GCN-NEXT: s_addc_u32 s1, s1, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i32, i32* %ptr, i32 4
> %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42,
> i32 0, i32 0, i1 false)
> ret void
> @@ -1292,7 +1096,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32*
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1317,7 +1121,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32*
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1340,7 +1144,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32*
> ; GFX9-LABEL: flat_atomic_inc_ret_i32_offset_addr64:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v1
> ; GFX9-NEXT: v_mul_hi_u32 v3, 4, v0
> @@ -1375,7 +1179,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1395,7 +1199,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 4, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 4, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 4, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1413,7 +1217,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
> ; GFX9-LABEL: flat_atomic_inc_noret_i32_offset_addr64:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_hi_u32 v3, 4, v0
> ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v1
> @@ -1441,7 +1245,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> ; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
> ; CI: ; %bb.0:
> ; CI-NEXT: v_mul_lo_u32 v7, 8, v0
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0
> ; CI-NEXT: v_mov_b32_e32 v0, 9
> ; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7
> @@ -1461,7 +1265,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> ; VI-LABEL: atomic_inc_shl_base_lds_0_i64:
> ; VI: ; %bb.0:
> ; VI-NEXT: v_mul_lo_u32 v7, 8, v0
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0
> ; VI-NEXT: v_mov_b32_e32 v0, 9
> ; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7
> @@ -1481,7 +1285,7 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> ; GFX9-LABEL: atomic_inc_shl_base_lds_0_i64:
> ; GFX9: ; %bb.0:
> ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v0
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_mov_b32_e32 v1, 9
> ; GFX9-NEXT: v_add_u32_e32 v4, 2, v0
> ; GFX9-NEXT: v_add_u32_e32 v0, 0, v3
> @@ -1506,106 +1310,42 @@ define amdgpu_kernel void
> @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
> }
>
> define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr)
> #0 {
> -; CI-LABEL: flat_atomic_inc_ret_i64:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: v_mov_b32_e32 v3, s1
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_ret_i64:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: v_mov_b32_e32 v3, s1
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_ret_i64:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: v_mov_b32_e32 v3, s1
> -; GFX9-NEXT: v_mov_b32_e32 v2, s0
> -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_ret_i64:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s2
> +; GCN-NEXT: v_mov_b32_e32 v1, s3
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: v_mov_b32_e32 v3, s1
> +; GCN-NEXT: v_mov_b32_e32 v2, s0
> +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> +; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> +; GCN-NEXT: s_endpgm
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42,
> i32 0, i32 0, i1 false)
> store i64 %result, i64* %out
> ret void
> }
>
> define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64*
> %ptr) #0 {
> -; CI-LABEL: flat_atomic_inc_ret_i64_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s2, s2, 32
> -; CI-NEXT: s_addc_u32 s3, s3, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s2
> -; CI-NEXT: v_mov_b32_e32 v1, s3
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: v_mov_b32_e32 v3, s1
> -; CI-NEXT: v_mov_b32_e32 v2, s0
> -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_ret_i64_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s2, s2, 32
> -; VI-NEXT: s_addc_u32 s3, s3, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s2
> -; VI-NEXT: v_mov_b32_e32 v1, s3
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: v_mov_b32_e32 v3, s1
> -; VI-NEXT: v_mov_b32_e32 v2, s0
> -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_ret_i64_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s2, s2, 32
> -; GFX9-NEXT: s_addc_u32 s3, s3, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s2
> -; GFX9-NEXT: v_mov_b32_e32 v1, s3
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: v_mov_b32_e32 v3, s1
> -; GFX9-NEXT: v_mov_b32_e32 v2, s0
> -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> -; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_ret_i64_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s2, s2, 32
> +; GCN-NEXT: s_addc_u32 s3, s3, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s2
> +; GCN-NEXT: v_mov_b32_e32 v1, s3
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: v_mov_b32_e32 v3, s1
> +; GCN-NEXT: v_mov_b32_e32 v2, s0
> +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
> +; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i64, i64* %ptr, i32 4
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42,
> i32 0, i32 0, i1 false)
> store i64 %result, i64* %out
> @@ -1613,81 +1353,33 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr)
> }
>
> define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
> -; CI-LABEL: flat_atomic_inc_noret_i64:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_noret_i64:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_noret_i64:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_noret_i64:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: s_endpgm
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42,
> i32 0, i32 0, i1 false)
> ret void
> }
>
> define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr)
> nounwind {
> -; CI-LABEL: flat_atomic_inc_noret_i64_offset:
> -; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> -; CI-NEXT: v_mov_b32_e32 v2, 42
> -; CI-NEXT: v_mov_b32_e32 v3, 0
> -; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: s_add_u32 s0, s0, 32
> -; CI-NEXT: s_addc_u32 s1, s1, 0
> -; CI-NEXT: v_mov_b32_e32 v0, s0
> -; CI-NEXT: v_mov_b32_e32 v1, s1
> -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; CI-NEXT: s_endpgm
> -;
> -; VI-LABEL: flat_atomic_inc_noret_i64_offset:
> -; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; VI-NEXT: v_mov_b32_e32 v2, 42
> -; VI-NEXT: v_mov_b32_e32 v3, 0
> -; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: s_add_u32 s0, s0, 32
> -; VI-NEXT: s_addc_u32 s1, s1, 0
> -; VI-NEXT: v_mov_b32_e32 v0, s0
> -; VI-NEXT: v_mov_b32_e32 v1, s1
> -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; VI-NEXT: s_endpgm
> -;
> -; GFX9-LABEL: flat_atomic_inc_noret_i64_offset:
> -; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> -; GFX9-NEXT: v_mov_b32_e32 v2, 42
> -; GFX9-NEXT: v_mov_b32_e32 v3, 0
> -; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: s_add_u32 s0, s0, 32
> -; GFX9-NEXT: s_addc_u32 s1, s1, 0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s0
> -; GFX9-NEXT: v_mov_b32_e32 v1, s1
> -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> -; GFX9-NEXT: s_endpgm
> +; GCN-LABEL: flat_atomic_inc_noret_i64_offset:
> +; GCN: ; %bb.0:
> +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> +; GCN-NEXT: v_mov_b32_e32 v2, 42
> +; GCN-NEXT: v_mov_b32_e32 v3, 0
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +; GCN-NEXT: s_add_u32 s0, s0, 32
> +; GCN-NEXT: s_addc_u32 s1, s1, 0
> +; GCN-NEXT: v_mov_b32_e32 v0, s0
> +; GCN-NEXT: v_mov_b32_e32 v1, s1
> +; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
> +; GCN-NEXT: s_endpgm
> %gep = getelementptr i64, i64* %ptr, i32 4
> %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42,
> i32 0, i32 0, i1 false)
> ret void
> @@ -1699,7 +1391,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64*
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1725,7 +1417,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64*
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v4, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1748,7 +1440,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64*
> ;
> ; GFX9-LABEL: flat_atomic_inc_ret_i64_offset_addr64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v1
> @@ -1785,7 +1477,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0
> ; CI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; CI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; CI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
> +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; CI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; CI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; CI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
> @@ -1806,7 +1498,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0
> ; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; VI-NEXT: v_mul_lo_u32 v2, 0, v0
> ; VI-NEXT: v_mul_lo_u32 v1, 8, v1
> -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; VI-NEXT: v_mul_hi_u32 v3, 8, v0
> ; VI-NEXT: v_mul_lo_u32 v0, 8, v0
> ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
> @@ -1824,7 +1516,7 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0
> ;
> ; GFX9-LABEL: flat_atomic_inc_noret_i64_offset_addr64:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
> +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
> ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
> ; GFX9-NEXT: v_mul_lo_u32 v2, 0, v0
> ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v1
> @@ -1851,18 +1543,18 @@ define amdgpu_kernel void
> @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0
> define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)*
> %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
> ; CI-LABEL: nocse_lds_atomic_inc_ret_i32:
> ; CI: ; %bb.0:
> -; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
> -; CI-NEXT: s_load_dword s0, s[0:1], 0xd
> +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; CI-NEXT: s_load_dword s4, s[4:5], 0x4
> ; CI-NEXT: v_mov_b32_e32 v2, 42
> ; CI-NEXT: s_mov_b32 m0, -1
> ; CI-NEXT: s_waitcnt lgkmcnt(0)
> -; CI-NEXT: v_mov_b32_e32 v0, s6
> -; CI-NEXT: v_mov_b32_e32 v3, s0
> +; CI-NEXT: v_mov_b32_e32 v0, s2
> +; CI-NEXT: v_mov_b32_e32 v3, s4
> ; CI-NEXT: ds_inc_rtn_u32 v4, v3, v2
> ; CI-NEXT: ds_inc_rtn_u32 v5, v3, v2
> -; CI-NEXT: v_mov_b32_e32 v2, s4
> -; CI-NEXT: v_mov_b32_e32 v3, s5
> -; CI-NEXT: v_mov_b32_e32 v1, s7
> +; CI-NEXT: v_mov_b32_e32 v3, s1
> +; CI-NEXT: v_mov_b32_e32 v2, s0
> +; CI-NEXT: v_mov_b32_e32 v1, s3
> ; CI-NEXT: s_waitcnt lgkmcnt(1)
> ; CI-NEXT: flat_store_dword v[2:3], v4
> ; CI-NEXT: s_waitcnt lgkmcnt(1)
> @@ -1871,18 +1563,18 @@ define amdgpu_kernel void
> @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0,
> ;
> ; VI-LABEL: nocse_lds_atomic_inc_ret_i32:
> ; VI: ; %bb.0:
> -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
> -; VI-NEXT: s_load_dword s0, s[0:1], 0x34
> +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; VI-NEXT: s_load_dword s4, s[4:5], 0x10
> ; VI-NEXT: v_mov_b32_e32 v2, 42
> ; VI-NEXT: s_mov_b32 m0, -1
> ; VI-NEXT: s_waitcnt lgkmcnt(0)
> -; VI-NEXT: v_mov_b32_e32 v0, s6
> -; VI-NEXT: v_mov_b32_e32 v3, s0
> +; VI-NEXT: v_mov_b32_e32 v0, s2
> +; VI-NEXT: v_mov_b32_e32 v3, s4
> ; VI-NEXT: ds_inc_rtn_u32 v4, v3, v2
> ; VI-NEXT: ds_inc_rtn_u32 v5, v3, v2
> -; VI-NEXT: v_mov_b32_e32 v2, s4
> -; VI-NEXT: v_mov_b32_e32 v3, s5
> -; VI-NEXT: v_mov_b32_e32 v1, s7
> +; VI-NEXT: v_mov_b32_e32 v3, s1
> +; VI-NEXT: v_mov_b32_e32 v2, s0
> +; VI-NEXT: v_mov_b32_e32 v1, s3
> ; VI-NEXT: s_waitcnt lgkmcnt(1)
> ; VI-NEXT: flat_store_dword v[2:3], v4
> ; VI-NEXT: s_waitcnt lgkmcnt(1)
> @@ -1891,17 +1583,17 @@ define amdgpu_kernel void
> @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0,
> ;
> ; GFX9-LABEL: nocse_lds_atomic_inc_ret_i32:
> ; GFX9: ; %bb.0:
> -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
> -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x34
> +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
> +; GFX9-NEXT: s_load_dword s4, s[4:5], 0x10
> ; GFX9-NEXT: v_mov_b32_e32 v0, 42
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
> -; GFX9-NEXT: v_mov_b32_e32 v2, s6
> -; GFX9-NEXT: v_mov_b32_e32 v1, s0
> +; GFX9-NEXT: v_mov_b32_e32 v2, s2
> +; GFX9-NEXT: v_mov_b32_e32 v1, s4
> ; GFX9-NEXT: ds_inc_rtn_u32 v4, v1, v0
> ; GFX9-NEXT: ds_inc_rtn_u32 v5, v1, v0
> -; GFX9-NEXT: v_mov_b32_e32 v0, s4
> -; GFX9-NEXT: v_mov_b32_e32 v1, s5
> -; GFX9-NEXT: v_mov_b32_e32 v3, s7
> +; GFX9-NEXT: v_mov_b32_e32 v0, s0
> +; GFX9-NEXT: v_mov_b32_e32 v1, s1
> +; GFX9-NEXT: v_mov_b32_e32 v3, s3
> ; GFX9-NEXT: s_waitcnt lgkmcnt(1)
> ; GFX9-NEXT: global_store_dword v[0:1], v4, off
> ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
>
> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
> index 8287a60a069f..1b09c6251912 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
> @@ -1,4 +1,4 @@
> ; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
> -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
> -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs <
> %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
> +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire
> -mattr=+flat-for-global -verify-machineinstrs <
> %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
> +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global
> -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
> ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL
> %S/../llvm.amdgcn.ds.append.ll
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
> index c755c37cad46..7aea170ed1ef 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
> @@ -1,4 +1,4 @@
> ; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9
> %S/../llvm.amdgcn.ds.append.ll
> -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
> -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs <
> %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
> +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire
> -mattr=+flat-for-global -verify-machineinstrs <
> %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
> +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global
> -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
> ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs
> < %S/../llvm.amdgcn.ds.append.ll | FileCheck
> -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
> index 55929202b1c2..f4ede38b26aa 100644
> ---
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
> +++
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
> @@ -1,6 +1,6 @@
> ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mattr=-code-object-v3
> -mcpu=kaveri -verify-machineinstrs < %s | FileCheck
> -check-prefixes=CO-V2,HSA,ALL %s
> ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii
> -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s
> -; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii
> -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL
> %s
> +; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii
> -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefixes=OS-UNKNOWN,MESA,ALL %s
>
> ; ALL-LABEL: {{^}}test:
> ; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
>
> diff --git
> a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
> b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
> index 946a138bee4f..aed978899565 100644
> --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
> +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
> @@ -1,9 +1,9 @@
> ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa
> -mattr=-code-object-v3 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck
> -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA %s
> -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa
> -mattr=-code-object-v3 -mcpu=carrizo -mattr=-flat-for-global
> -verify-machineinstrs < %s | FileCheck -check-prefix=ALL
> -check-prefix=CO-V2 -check-prefix=VI-HSA %s
> -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii
> -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA
> -check-prefix=SI-MESA %s
> -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga
> -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s
> -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d
> -mattr=-code-object-v3 -mcpu=hawaii -verify-machineinstrs < %s | FileCheck
> -check-prefixes=ALL,CO-V2,SI-MESA %s
> -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d
> -mattr=-code-object-v3 -mcpu=tonga -mattr=-flat-for-global
> -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s
> +; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa
> -mattr=-code-object-v3 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck
> -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA %s
> +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii
> -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s
> +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga
> -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck
> -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s
> +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d
> -mattr=+flat-for-global,-code-object-v3 -mcpu=hawaii -verify-machineinstrs
> < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s
> +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d
> -mattr=-code-object-v3 -mcpu=tonga -verify-machineinstrs < %s | FileCheck
> -check-prefixes=ALL,CO-V2,VI-MESA %s
>
> declare i32 @llvm.amdgcn.workitem.id.x() #0
> declare i32 @llvm.amdgcn.workitem.id.y() #0
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200129/514d37b6/attachment-0001.html>
More information about the llvm-commits
mailing list