[llvm] Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" v3 (PR #114443)
Krzysztof Drewniak via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 15:19:24 PDT 2024
https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/114443
>From e7f35596ea5ad43beb64f499e4d4deb3f3561a52 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Thu, 31 Oct 2024 17:39:29 +0000
Subject: [PATCH] Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer
vectors, buffer.*.pN (#110714)" v3
This reverts commit 8a849a2a567d4e519b246a16936b6e7519936d4b.
It seems I missed a spot when trying to ensure the code in the
instruction selection tests were actually legalized MIR.
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 61 +-
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 12 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 +-
.../GlobalISel/buffer-load-store-pointers.ll | 301 ++
...st-select-load-global-old-legalization.mir | 3323 +++++++++++++++++
.../GlobalISel/inst-select-load-local.mir | 96 +-
.../GlobalISel/legalize-load-constant.mir | 51 +-
.../AMDGPU/GlobalISel/legalize-load-flat.mir | 146 +-
.../GlobalISel/legalize-load-global.mir | 98 +-
.../AMDGPU/GlobalISel/legalize-load-local.mir | 50 +-
.../GlobalISel/legalize-load-private.mir | 83 +-
.../GlobalISel/legalize-store-global.mir | 84 +-
12 files changed, 4038 insertions(+), 269 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index d7126132356d2c..ffff34417232bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
const unsigned Size = Ty.getSizeInBits();
+ if (Ty.isPointerVector())
+ return true;
if (Size <= 64)
return false;
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
if (!Ty.isVector())
return true;
- if (Ty.isPointerVector())
- return true;
-
unsigned EltSize = Ty.getScalarSizeInBits();
return EltSize != 32 && EltSize != 64;
}
@@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return Reg;
}
-Register AMDGPULegalizerInfo::fixStoreSourceType(
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
+Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
+ Register VData, LLT MemTy,
+ bool IsFormat) const {
MachineRegisterInfo *MRI = B.getMRI();
LLT Ty = MRI->getType(VData);
@@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
if (hasBufferRsrcWorkaround(Ty))
return castBufferRsrcToV4I32(VData, B);
+ if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
+ Ty = getBitcastRegisterType(Ty);
+ VData = B.buildBitcast(Ty, VData).getReg(0);
+ }
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
}
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
+ LegalizerHelper &Helper,
bool IsTyped,
bool IsFormat) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const LLT S32 = LLT::scalar(32);
- VData = fixStoreSourceType(B, VData, IsFormat);
- castBufferRsrcArgToV4I32(MI, B, 2);
- Register RSrc = MI.getOperand(2).getReg();
-
MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize().getValue();
+ LLT MemTy = MMO->getMemoryType();
+
+ VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
+
+ castBufferRsrcArgToV4I32(MI, B, 2);
+ Register RSrc = MI.getOperand(2).getReg();
unsigned ImmOffset;
@@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
}
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
+ LegalizerHelper &Helper,
bool IsFormat,
bool IsTyped) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
+
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const LLT MemTy = MMO->getMemoryType();
@@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
// logic doesn't have to handle that case.
if (hasBufferRsrcWorkaround(Ty)) {
+ Observer.changingInstr(MI);
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
+ Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
}
+ if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
+ Ty = getBitcastRegisterType(Ty);
+ Observer.changingInstr(MI);
+ Helper.bitcastDst(MI, Ty, 0);
+ Observer.changedInstr(MI);
+ Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
+ }
+
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
- return legalizeBufferStore(MI, MRI, B, false, false);
+ return legalizeBufferStore(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
- return legalizeBufferStore(MI, MRI, B, false, true);
+ return legalizeBufferStore(MI, Helper, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
- return legalizeBufferStore(MI, MRI, B, true, true);
+ return legalizeBufferStore(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_atomic_buffer_load:
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
- return legalizeBufferLoad(MI, MRI, B, false, false);
+ return legalizeBufferLoad(MI, Helper, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
- return legalizeBufferLoad(MI, MRI, B, true, false);
+ return legalizeBufferLoad(MI, Helper, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
- return legalizeBufferLoad(MI, MRI, B, true, true);
+ return legalizeBufferLoad(MI, Helper, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 84470dc75b60ef..86c15197805d23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
- Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
+ Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
bool IsFormat) const;
- bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsTyped,
- bool IsFormat) const;
- bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsFormat,
- bool IsTyped) const;
+ bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
+ bool IsTyped, bool IsFormat) const;
+ bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
+ bool IsFormat, bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 3556f6a95b521e..852430129251c6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -585,7 +585,7 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
-def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
+def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
new file mode 100644
index 00000000000000..091c9f143ce7ee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck --check-prefix=GFX9 %s
+
+define ptr @buffer_load_p0(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p0
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr %ret
+}
+
+define void @buffer_store_p0(ptr %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p0
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(1) @buffer_load_p1(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(1) %ret
+}
+
+define void @buffer_store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(4) @buffer_load_p4(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p4
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %ret = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(4) %ret
+}
+
+define void @buffer_store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p4
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define ptr addrspace(5) @buffer_load_p5(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0
+ %ret = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret ptr addrspace(5) %ret
+}
+
+define void @buffer_store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define <2 x ptr addrspace(1)> @buffer_load_v2p1(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_v2p1
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s64>) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
+ ; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ %ret = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret <2 x ptr addrspace(1)> %ret
+}
+
+define void @buffer_store_v2p5(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_v2p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s64>) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define <3 x ptr addrspace(5)> @buffer_load_v3p5(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_v3p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX3_OFFSET:%[0-9]+]]:vreg_96_align2 = BUFFER_LOAD_DWORDX3_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub1
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub2
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %ret = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret <3 x ptr addrspace(5)> %ret
+}
+
+define void @buffer_store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_v3p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
+
+define <4 x ptr addrspace(5)> @buffer_load_v4p5(ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_load_v4p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
+ ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
+ ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
+ ; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
+ ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ %ret = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret <4 x ptr addrspace(5)> %ret
+}
+
+define void @buffer_store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
+ ; GFX9-LABEL: name: buffer_store_v4p5
+ ; GFX9: bb.1 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
+ ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.buf, align 1, addrspace 8)
+ ; GFX9-NEXT: SI_RETURN
+ call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
new file mode 100644
index 00000000000000..864fa2ab438df9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
@@ -0,0 +1,3323 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX12 %s
+
+---
+
+name: load_global_s32_from_4
+
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_4
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_4
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_4
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_4
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_4
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_4
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_4
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_4
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_s32_from_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_2
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_2
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_2
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_2
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_2
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_2
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_2
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_2
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_s32_from_1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_v2s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v2s32
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v2s32
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v2s32
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v2s32
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v2s32
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v2s32
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v2s32
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v2s32
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v4s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v4s32
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v4s32
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v4s32
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v4s32
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v4s32
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v4s32
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v4s32
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v4s32
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s64
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s64
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s64
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX8-LABEL: name: load_global_s64
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX9-LABEL: name: load_global_s64
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX10-LABEL: name: load_global_s64
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX11-LABEL: name: load_global_s64
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX12-LABEL: name: load_global_s64
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v2s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v2s64
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v2s64
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v2s64
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v2s64
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v2s64
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v2s64
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v2s64
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v2s64
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_v2p1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v2p1
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v2p1
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v2p1
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v2p1
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v2p1
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v2p1
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v2p1
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v2p1
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1)
+ %2:vgpr(<2 x p1>) = G_BITCAST %1(<2 x s64>)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+
+...
+
+---
+
+name: load_global_s128
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s128
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s128
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s128
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX8-LABEL: name: load_global_s128
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX9-LABEL: name: load_global_s128
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX10-LABEL: name: load_global_s128
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX11-LABEL: name: load_global_s128
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX12-LABEL: name: load_global_s128
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
+ %2:vgpr(s128) = G_BITCAST %1(<4 x s32>)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+
+...
+
+---
+
+name: load_global_p3_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_p3_from_4
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_p3_from_4
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_p3_from_4
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX8-LABEL: name: load_global_p3_from_4
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX9-LABEL: name: load_global_p3_from_4
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX10-LABEL: name: load_global_p3_from_4
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX11-LABEL: name: load_global_p3_from_4
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX12-LABEL: name: load_global_p3_from_4
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_p1_from_8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_p1_from_8
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_p1_from_8
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_p1_from_8
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX8-LABEL: name: load_global_p1_from_8
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX9-LABEL: name: load_global_p1_from_8
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX10-LABEL: name: load_global_p1_from_8
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX11-LABEL: name: load_global_p1_from_8
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX12-LABEL: name: load_global_p1_from_8
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_p999_from_8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_p999_from_8
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX7-LABEL: name: load_global_p999_from_8
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_p999_from_8
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX8-LABEL: name: load_global_p999_from_8
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX9-LABEL: name: load_global_p999_from_8
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX10-LABEL: name: load_global_p999_from_8
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX11-LABEL: name: load_global_p999_from_8
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX12-LABEL: name: load_global_p999_from_8
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v2p3
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v2p3
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v2p3
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v2p3
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v2p3
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v2p3
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v2p3
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v2p3
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v2p3
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
+ %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>)
+ $vgpr0_vgpr1 = COPY %2
+
+...
+
+---
+
+name: load_global_v2s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v2s16
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v2s16
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v2s16
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX8-LABEL: name: load_global_v2s16
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+ ;
+ ; GFX9-LABEL: name: load_global_v2s16
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX10-LABEL: name: load_global_v2s16
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX11-LABEL: name: load_global_v2s16
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ ;
+ ; GFX12-LABEL: name: load_global_v2s16
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_v4s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v4s16
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v4s16
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v4s16
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v4s16
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v4s16
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v4s16
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v4s16
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v4s16
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v8s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_v8s16
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_v8s16
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_v8s16
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX8-LABEL: name: load_global_v8s16
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+ ;
+ ; GFX9-LABEL: name: load_global_v8s16
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX10-LABEL: name: load_global_v8s16
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX11-LABEL: name: load_global_v8s16
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ ;
+ ; GFX12-LABEL: name: load_global_v8s16
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
+ %2:vgpr(<8 x s16>) = G_BITCAST %1(<4 x s32>)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_global_s32_from_1_gep_2047
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_2047
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 2047
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_2048
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_2048
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 2048
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m2047
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2047
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -2047
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m2048
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2048
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -2048
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_4095
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 4095
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_4096
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_4096
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 4096
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4095
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -4095
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m4096
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4096
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -4096
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_8191
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_8191
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 8191
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_8192
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_8192
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 8192
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m8191
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8191
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -8191
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m8192
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8192
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -8192
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_24bit_max
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_max
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 8388607
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_2x_24bit_max
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 16777214
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_24bit_min
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_min
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -8388608
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_2x_24bit_min
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
+ ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
+ ;
+ ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX7-FLAT: liveins: $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: {{ $}}
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
+ ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+ ;
+ ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX11: liveins: $vgpr0_vgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ ;
+ ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX12-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+ ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_CONSTANT i64 -16777215
+ %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
+ $vgpr0 = COPY %3
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index 280c7a5a492da8..59c57a5fefbed9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s
-# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX7 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
@@ -24,6 +24,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -31,12 +32,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -67,6 +70,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_2
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -74,12 +78,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_2
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -113,6 +119,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_1
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -120,12 +127,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_1
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -156,6 +165,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX7-LABEL: name: load_local_v2s32
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -163,12 +173,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX9-LABEL: name: load_local_v2s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_v2s32
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -199,6 +211,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ;
; GFX7-LABEL: name: load_local_v2s32_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -206,12 +219,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ;
; GFX9-LABEL: name: load_local_v2s32_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_v2s32_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -242,6 +257,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX7-LABEL: name: load_local_s64
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -249,12 +265,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX9-LABEL: name: load_local_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s64
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -285,6 +303,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ;
; GFX7-LABEL: name: load_local_s64_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -292,12 +311,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ;
; GFX9-LABEL: name: load_local_s64_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s64_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -328,6 +349,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX7-LABEL: name: load_local_p3_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -335,12 +357,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX9-LABEL: name: load_local_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_p3_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -371,6 +395,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX7-LABEL: name: load_local_p5_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -378,12 +403,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX9-LABEL: name: load_local_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_p5_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -414,6 +441,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX7-LABEL: name: load_local_p1_align8
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -421,12 +449,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX9-LABEL: name: load_local_p1_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_p1_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -457,6 +487,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ;
; GFX7-LABEL: name: load_local_p1_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -464,12 +495,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ;
; GFX9-LABEL: name: load_local_p1_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_p1_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -500,6 +533,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
; GFX7-LABEL: name: load_local_p999_from_8
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -507,12 +541,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
; GFX9-LABEL: name: load_local_p999_from_8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ;
; GFX10-LABEL: name: load_local_p999_from_8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -539,32 +575,36 @@ body: |
; GFX6-LABEL: name: load_local_v2p3
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: $m0 = S_MOV_B32 -1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX7-LABEL: name: load_local_v2p3
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: $m0 = S_MOV_B32 -1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX9-LABEL: name: load_local_v2p3
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_v2p3
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
%0:vgpr(p3) = COPY $vgpr0
- %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
- $vgpr0_vgpr1 = COPY %1
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3)
+ %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>)
+ $vgpr0_vgpr1 = COPY %2
...
@@ -586,6 +626,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX7-LABEL: name: load_local_v2s16
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -593,12 +634,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+ ;
; GFX9-LABEL: name: load_local_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_v2s16
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -629,6 +672,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX7-LABEL: name: load_local_v4s16
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -636,12 +680,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ;
; GFX9-LABEL: name: load_local_v4s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_v4s16
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -698,6 +744,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65535
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -705,12 +752,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65535
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -745,6 +794,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -754,6 +804,7 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -762,6 +813,7 @@ body: |
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -801,6 +853,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65536
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -810,6 +863,7 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -818,6 +872,7 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65536
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -854,6 +909,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_m1
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -863,6 +919,7 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+ ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -871,6 +928,7 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_m1
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -907,6 +965,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ;
; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
@@ -914,12 +973,14 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ;
; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
@@ -954,6 +1015,7 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ;
; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
@@ -963,6 +1025,7 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ;
; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -971,6 +1034,7 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ ;
; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index a63df136e003c3..ff1d3fe3796732 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -6604,22 +6604,25 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4)
$vgpr0_vgpr1 = COPY %1
@@ -6635,22 +6638,25 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4)
$vgpr0_vgpr1 = COPY %1
@@ -6683,7 +6689,6 @@ body: |
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6699,9 +6704,9 @@ body: |
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -6724,7 +6729,6 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6740,9 +6744,9 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align1
; GFX9: liveins: $vgpr0_vgpr1
@@ -6765,7 +6769,6 @@ body: |
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6781,9 +6784,9 @@ body: |
; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX9-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 032ca7c0d4fee9..8e299a19dfdbe5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -12630,65 +12630,73 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3), align 8)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3), align 8)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8
; GFX9PLUS: liveins: $vgpr0_vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8
; GFX11PLUS: liveins: $vgpr0_vgpr1
; GFX11PLUS-NEXT: {{ $}}
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align8
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX9PLUS-NEXT: {{ $}}
; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX11PLUS-NEXT: {{ $}}
; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0)
$vgpr0_vgpr1 = COPY %1
@@ -12704,65 +12712,73 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4
; GFX9PLUS: liveins: $vgpr0_vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4
; GFX11PLUS: liveins: $vgpr0_vgpr1
; GFX11PLUS-NEXT: {{ $}}
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align4
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX9PLUS-NEXT: {{ $}}
; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX11PLUS-NEXT: {{ $}}
; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
+ ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 0)
$vgpr0_vgpr1 = COPY %1
@@ -12795,7 +12811,6 @@ body: |
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -12811,9 +12826,9 @@ body: |
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -12836,7 +12851,6 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -12852,30 +12866,33 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1
; GFX9PLUS: liveins: $vgpr0_vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1)
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1)
+ ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1
; GFX11PLUS: liveins: $vgpr0_vgpr1
; GFX11PLUS-NEXT: {{ $}}
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1)
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1)
+ ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align1
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 1)
+ ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
@@ -12898,7 +12915,6 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -12914,9 +12930,9 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
@@ -12939,7 +12955,6 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -12955,9 +12970,9 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
@@ -12980,7 +12995,6 @@ body: |
; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -12996,9 +13010,9 @@ body: |
; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX12-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index d6acc6ecdfc660..f384114ee4cde7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -13448,43 +13448,49 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align8
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align8
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
@@ -13500,43 +13506,49 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align4
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align4
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
+ ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1)
$vgpr0_vgpr1 = COPY %1
@@ -13569,7 +13581,6 @@ body: |
; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13585,16 +13596,17 @@ body: |
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align1
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align1
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -13617,7 +13629,6 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13633,9 +13644,9 @@ body: |
; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -13658,7 +13669,6 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13674,16 +13684,17 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
+ ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1
; GFX9-MESA: liveins: $vgpr0_vgpr1
@@ -13706,7 +13717,6 @@ body: |
; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
- ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13722,9 +13732,9 @@ body: |
; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX9-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+ ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 1249de647bb759..1608234d6b2bc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -14595,71 +14595,81 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-LABEL: name: test_load_local_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-DS128-LABEL: name: test_load_local_v2p3_align8
; CI-DS128: liveins: $vgpr0
; CI-DS128-NEXT: {{ $}}
; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_local_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_local_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX9-UNALIGNED: liveins: $vgpr0
; GFX9-UNALIGNED-NEXT: {{ $}}
; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_local_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX10-UNALIGNED: liveins: $vgpr0
; GFX10-UNALIGNED-NEXT: {{ $}}
; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_local_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX11-UNALIGNED: liveins: $vgpr0
; GFX11-UNALIGNED-NEXT: {{ $}}
; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
- ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
+ ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 6d93112aae1a06..8835956e11a4e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -16860,106 +16860,117 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-LABEL: name: test_load_private_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_private_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_private_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_private_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_private_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
+ ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_private_v2p3_align8
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
+ ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX9-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX9: liveins: $vgpr0
; UNALIGNED_GFX9-NEXT: {{ $}}
; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX10-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX10: liveins: $vgpr0
; UNALIGNED_GFX10-NEXT: {{ $}}
; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
+ ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
- ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
+ ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX11-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX11: liveins: $vgpr0
; UNALIGNED_GFX11-NEXT: {{ $}}
; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
- ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
+ ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX12: liveins: $vgpr0
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
+ ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p5) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index f2a88a21a286ef..b9c72d39ed45b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -2310,9 +2310,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
- ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2332,8 +2332,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
- ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
+ ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -2353,16 +2352,17 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
- ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2383,8 +2383,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
- ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
@@ -2405,7 +2404,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1)
@@ -2422,9 +2422,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
- ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2433,8 +2433,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
- ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
+ ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -2445,16 +2444,17 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align2
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
- ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2463,8 +2463,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
- ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -2475,7 +2474,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1)
@@ -2492,28 +2492,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align4
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1)
@@ -2530,28 +2534,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align8
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1)
@@ -2568,28 +2576,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align16
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align16
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1)
More information about the llvm-commits
mailing list