[llvm] e075dcf - Revert "Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" (#111059)"
NAKAMURA Takumi via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 5 19:08:28 PDT 2024
Author: NAKAMURA Takumi
Date: 2024-10-06T10:50:51+09:00
New Revision: e075dcf7d270fd52dc837163ff24e8c872dfeb49
URL: https://github.com/llvm/llvm-project/commit/e075dcf7d270fd52dc837163ff24e8c872dfeb49
DIFF: https://github.com/llvm/llvm-project/commit/e075dcf7d270fd52dc837163ff24e8c872dfeb49.diff
LOG: Revert "Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" (#111059)"
This reverts commit 98a15c7b0c6ec129d371f0c121dbe9396c4f5609.
(llvmorg-20-init-8051-g98a15c7b0c6e)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 121035028649ff..c3f751c1a98830 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -494,8 +494,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
const unsigned Size = Ty.getSizeInBits();
- if (Ty.isPointerVector())
- return true;
if (Size <= 64)
return false;
// Address space 8 pointers get their own workaround.
@@ -504,6 +502,9 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
if (!Ty.isVector())
return true;
+ if (Ty.isPointerVector())
+ return true;
+
unsigned EltSize = Ty.getScalarSizeInBits();
return EltSize != 32 && EltSize != 64;
}
@@ -5793,9 +5794,8 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return Reg;
}
-Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
- Register VData, LLT MemTy,
- bool IsFormat) const {
+Register AMDGPULegalizerInfo::fixStoreSourceType(
+ MachineIRBuilder &B, Register VData, bool IsFormat) const {
MachineRegisterInfo *MRI = B.getMRI();
LLT Ty = MRI->getType(VData);
@@ -5805,10 +5805,6 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
if (hasBufferRsrcWorkaround(Ty))
return castBufferRsrcToV4I32(VData, B);
- if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
- Ty = getBitcastRegisterType(Ty);
- VData = B.buildBitcast(Ty, VData).getReg(0);
- }
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5826,27 +5822,23 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
}
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
- LegalizerHelper &Helper,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
bool IsTyped,
bool IsFormat) const {
- MachineIRBuilder &B = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *B.getMRI();
-
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const LLT S32 = LLT::scalar(32);
- MachineMemOperand *MMO = *MI.memoperands_begin();
- const int MemSize = MMO->getSize().getValue();
- LLT MemTy = MMO->getMemoryType();
-
- VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
-
+ VData = fixStoreSourceType(B, VData, IsFormat);
castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const int MemSize = MMO->getSize().getValue();
+
unsigned ImmOffset;
// The typed intrinsics add an immediate after the registers.
@@ -5938,13 +5930,10 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
}
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
- LegalizerHelper &Helper,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
bool IsFormat,
bool IsTyped) const {
- MachineIRBuilder &B = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *B.getMRI();
- GISelChangeObserver &Observer = Helper.Observer;
-
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const LLT MemTy = MMO->getMemoryType();
@@ -5993,21 +5982,9 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
// logic doesn't have to handle that case.
if (hasBufferRsrcWorkaround(Ty)) {
- Observer.changingInstr(MI);
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
- Observer.changedInstr(MI);
Dst = MI.getOperand(0).getReg();
- B.setInsertPt(B.getMBB(), MI);
}
- if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
- Ty = getBitcastRegisterType(Ty);
- Observer.changingInstr(MI);
- Helper.bitcastDst(MI, Ty, 0);
- Observer.changedInstr(MI);
- Dst = MI.getOperand(0).getReg();
- B.setInsertPt(B.getMBB(), MI);
- }
-
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7387,17 +7364,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
- return legalizeBufferStore(MI, Helper, false, false);
+ return legalizeBufferStore(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
- return legalizeBufferStore(MI, Helper, false, true);
+ return legalizeBufferStore(MI, MRI, B, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
- return legalizeBufferStore(MI, Helper, true, true);
+ return legalizeBufferStore(MI, MRI, B, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7406,17 +7383,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_atomic_buffer_load:
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
- return legalizeBufferLoad(MI, Helper, false, false);
+ return legalizeBufferLoad(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
- return legalizeBufferLoad(MI, Helper, true, false);
+ return legalizeBufferLoad(MI, MRI, B, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
- return legalizeBufferLoad(MI, Helper, true, true);
+ return legalizeBufferLoad(MI, MRI, B, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 86c15197805d23..84470dc75b60ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -195,13 +195,15 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
- Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
+ Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
bool IsFormat) const;
- bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
- bool IsTyped, bool IsFormat) const;
- bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
- bool IsFormat, bool IsTyped) const;
+ bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsTyped,
+ bool IsFormat) const;
+ bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsFormat,
+ bool IsTyped) const;
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 902feacede83f4..ef9adde13348fe 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -590,7 +590,7 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
-def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
+def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
deleted file mode 100644
index 091c9f143ce7ee..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll
+++ /dev/null
@@ -1,301 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck --check-prefix=GFX9 %s
-
-define ptr @buffer_load_p0(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_p0
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %ret = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret ptr %ret
-}
-
-define void @buffer_store_p0(ptr %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_p0
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define ptr addrspace(1) @buffer_load_p1(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_p1
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %ret = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret ptr addrspace(1) %ret
-}
-
-define void @buffer_store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_p1
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define ptr addrspace(4) @buffer_load_p4(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_p4
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX2_OFFSET:%[0-9]+]]:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFSET]].sub1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %ret = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret ptr addrspace(4) %ret
-}
-
-define void @buffer_store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_p4
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define ptr addrspace(5) @buffer_load_p5(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0
- %ret = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret ptr addrspace(5) %ret
-}
-
-define void @buffer_store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define <2 x ptr addrspace(1)> @buffer_load_v2p1(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_v2p1
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s64>) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
- ; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- %ret = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret <2 x ptr addrspace(1)> %ret
-}
-
-define void @buffer_store_v2p5(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_v2p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE2]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s64>) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define <3 x ptr addrspace(5)> @buffer_load_v3p5(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_v3p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX3_OFFSET:%[0-9]+]]:vreg_96_align2 = BUFFER_LOAD_DWORDX3_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub1
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub2
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- %ret = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret <3 x ptr addrspace(5)> %ret
-}
-
-define void @buffer_store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_v3p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX3_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
-
-define <4 x ptr addrspace(5)> @buffer_load_v4p5(ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_load_v4p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
- ; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
- ; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
- ; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- %ret = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret <4 x ptr addrspace(5)> %ret
-}
-
-define void @buffer_store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
- ; GFX9-LABEL: name: buffer_store_v4p5
- ; GFX9: bb.1 (%ir-block.0):
- ; GFX9-NEXT: liveins: $sgpr6, $sgpr7, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr16
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr17
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
- ; GFX9-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.buf, align 1, addrspace 8)
- ; GFX9-NEXT: SI_RETURN
- call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
deleted file mode 100644
index 0c42b70edda65a..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-old-legalization.mir
+++ /dev/null
@@ -1,3279 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
-# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
-# RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX12 %s
-
----
-
-name: load_global_s32_from_4
-
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_4
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_4
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_4
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_4
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_4
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_4
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_4
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_4
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1)
- $vgpr0 = COPY %1
-
-...
-
----
-
-name: load_global_s32_from_2
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_2
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_2
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_2
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_2
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_2
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_2
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_2
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_2
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1)
- $vgpr0 = COPY %1
-
-...
-
----
-
-name: load_global_s32_from_1
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %1
-
-...
-
----
-
-name: load_global_v2s32
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v2s32
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_v2s32
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v2s32
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX8-LABEL: name: load_global_v2s32
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX9-LABEL: name: load_global_v2s32
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX10-LABEL: name: load_global_v2s32
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX11-LABEL: name: load_global_v2s32
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX12-LABEL: name: load_global_v2s32
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_v4s32
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v4s32
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_v4s32
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v4s32
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
- ;
- ; GFX8-LABEL: name: load_global_v4s32
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
- ;
- ; GFX9-LABEL: name: load_global_v4s32
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX10-LABEL: name: load_global_v4s32
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX11-LABEL: name: load_global_v4s32
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX12-LABEL: name: load_global_v4s32
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-
-...
-
----
-
-name: load_global_s64
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s64
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s64
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s64
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX8-LABEL: name: load_global_s64
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX9-LABEL: name: load_global_s64
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX10-LABEL: name: load_global_s64
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX11-LABEL: name: load_global_s64
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX12-LABEL: name: load_global_s64
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_v2s64
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v2s64
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_v2s64
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v2s64
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
- ;
- ; GFX8-LABEL: name: load_global_v2s64
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
- ;
- ; GFX9-LABEL: name: load_global_v2s64
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX10-LABEL: name: load_global_v2s64
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX11-LABEL: name: load_global_v2s64
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- ;
- ; GFX12-LABEL: name: load_global_v2s64
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1)
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-
-...
-
----
-
-name: load_global_v2p1
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v2p1
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX7-LABEL: name: load_global_v2p1
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v2p1
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX8-LABEL: name: load_global_v2p1
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX9-LABEL: name: load_global_v2p1
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX10-LABEL: name: load_global_v2p1
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX11-LABEL: name: load_global_v2p1
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- ;
- ; GFX12-LABEL: name: load_global_v2p1
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1)
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-
-...
-
----
-
-name: load_global_s128
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s128
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX7-LABEL: name: load_global_s128
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s128
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX8-LABEL: name: load_global_s128
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX9-LABEL: name: load_global_s128
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX10-LABEL: name: load_global_s128
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX11-LABEL: name: load_global_s128
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- ;
- ; GFX12-LABEL: name: load_global_s128
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1)
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-
-...
-
----
-
-name: load_global_p3_from_4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_p3_from_4
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_p3_from_4
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_p3_from_4
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX8-LABEL: name: load_global_p3_from_4
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX9-LABEL: name: load_global_p3_from_4
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX10-LABEL: name: load_global_p3_from_4
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX11-LABEL: name: load_global_p3_from_4
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX12-LABEL: name: load_global_p3_from_4
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1)
- $vgpr0 = COPY %1
-
-...
-
----
-
-name: load_global_p1_from_8
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_p1_from_8
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_p1_from_8
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_p1_from_8
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX8-LABEL: name: load_global_p1_from_8
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX9-LABEL: name: load_global_p1_from_8
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX10-LABEL: name: load_global_p1_from_8
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX11-LABEL: name: load_global_p1_from_8
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX12-LABEL: name: load_global_p1_from_8
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_p999_from_8
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_p999_from_8
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX7-LABEL: name: load_global_p999_from_8
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX7-FLAT-LABEL: name: load_global_p999_from_8
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX8-LABEL: name: load_global_p999_from_8
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX9-LABEL: name: load_global_p999_from_8
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX10-LABEL: name: load_global_p999_from_8
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX11-LABEL: name: load_global_p999_from_8
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
- ; GFX12-LABEL: name: load_global_p999_from_8
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_v2p3
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v2p3
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX7-LABEL: name: load_global_v2p3
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v2p3
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX8-LABEL: name: load_global_v2p3
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX9-LABEL: name: load_global_v2p3
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX10-LABEL: name: load_global_v2p3
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX11-LABEL: name: load_global_v2p3
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
- ; GFX12-LABEL: name: load_global_v2p3
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_v2s16
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v2s16
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_v2s16
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v2s16
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX8-LABEL: name: load_global_v2s16
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ;
- ; GFX9-LABEL: name: load_global_v2s16
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX10-LABEL: name: load_global_v2s16
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX11-LABEL: name: load_global_v2s16
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- ;
- ; GFX12-LABEL: name: load_global_v2s16
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1)
- $vgpr0 = COPY %1
-
-...
-
----
-
-name: load_global_v4s16
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v4s16
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_v4s16
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v4s16
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX8-LABEL: name: load_global_v4s16
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
- ;
- ; GFX9-LABEL: name: load_global_v4s16
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX10-LABEL: name: load_global_v4s16
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX11-LABEL: name: load_global_v4s16
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ;
- ; GFX12-LABEL: name: load_global_v4s16
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1)
- $vgpr0_vgpr1 = COPY %1
-
-...
-
----
-
-name: load_global_v8s16
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_v8s16
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX7-LABEL: name: load_global_v8s16
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX7-FLAT-LABEL: name: load_global_v8s16
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX8-LABEL: name: load_global_v8s16
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX9-LABEL: name: load_global_v8s16
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX10-LABEL: name: load_global_v8s16
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX11-LABEL: name: load_global_v8s16
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- ;
- ; GFX12-LABEL: name: load_global_v8s16
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
-
-...
-
-################################################################################
-### Stress addressing modes
-################################################################################
-
----
-
-name: load_global_s32_from_1_gep_2047
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_2047
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 2047
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_2048
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_2048
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 2048
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m2047
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2047
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -2047
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m2048
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m2048
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -2048
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_4095
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_4095
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 4095
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_4096
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_4096
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4096, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 4096
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m4095
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4095
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -4095
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m4096
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m4096
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -4096
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_8191
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_8191
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8191, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 8191
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_8192
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_8192
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8192, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 8192
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m8191
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8191
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8191, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -8191
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_m8192
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_m8192
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8192, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -8192
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_24bit_max
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8388607
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8388607, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_max
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 8388607, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 8388607
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_2x_24bit_max
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16777214
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_max
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 16777214, implicit $exec
- ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 16777214
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_24bit_min
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8388608, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_24bit_min
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -8388608, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -8388608
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
-
----
-
-name: load_global_s32_from_1_gep_2x_24bit_min
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-
-body: |
- bb.0:
- liveins: $vgpr0_vgpr1
-
- ; GFX6-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX6: liveins: $vgpr0_vgpr1
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX7: liveins: $vgpr0_vgpr1
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
- ;
- ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX7-FLAT: liveins: $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: {{ $}}
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX8-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX8: liveins: $vgpr0_vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1)
- ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
- ;
- ; GFX9-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX9: liveins: $vgpr0_vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX10-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX10: liveins: $vgpr0_vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX11-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX11: liveins: $vgpr0_vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- ;
- ; GFX12-LABEL: name: load_global_s32_from_1_gep_2x_24bit_min
- ; GFX12: liveins: $vgpr0_vgpr1
- ; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -16777215, implicit $exec
- ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0
- ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1
- ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX12-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
- ; GFX12-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
- ; GFX12-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
- %0:vgpr(p1) = COPY $vgpr0_vgpr1
- %1:vgpr(s64) = G_CONSTANT i64 -16777215
- %2:vgpr(p1) = G_PTR_ADD %0, %1
- %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1)
- $vgpr0 = COPY %3
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index 87018100abf686..280c7a5a492da8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -24,7 +24,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -32,14 +31,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -70,7 +67,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_2
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -78,14 +74,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_2
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -119,7 +113,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_1
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -127,14 +120,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_1
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -165,7 +156,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX7-LABEL: name: load_local_v2s32
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -173,14 +163,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX9-LABEL: name: load_local_v2s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_v2s32
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -211,7 +199,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
- ;
; GFX7-LABEL: name: load_local_v2s32_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -219,14 +206,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
- ;
; GFX9-LABEL: name: load_local_v2s32_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_v2s32_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -257,7 +242,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX7-LABEL: name: load_local_s64
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -265,14 +249,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX9-LABEL: name: load_local_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s64
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -303,7 +285,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
- ;
; GFX7-LABEL: name: load_local_s64_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -311,14 +292,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
- ;
; GFX9-LABEL: name: load_local_s64_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s64_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -349,7 +328,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX7-LABEL: name: load_local_p3_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -357,14 +335,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX9-LABEL: name: load_local_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_p3_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -395,7 +371,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX7-LABEL: name: load_local_p5_from_4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -403,14 +378,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX9-LABEL: name: load_local_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_p5_from_4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -441,7 +414,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX7-LABEL: name: load_local_p1_align8
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -449,14 +421,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX9-LABEL: name: load_local_p1_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_p1_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -487,7 +457,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
- ;
; GFX7-LABEL: name: load_local_p1_align4
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -495,14 +464,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
- ;
; GFX9-LABEL: name: load_local_p1_align4
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_p1_align4
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -533,7 +500,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
; GFX7-LABEL: name: load_local_p999_from_8
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -541,14 +507,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
; GFX9-LABEL: name: load_local_p999_from_8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ;
; GFX10-LABEL: name: load_local_p999_from_8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -579,26 +543,24 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
; GFX7-LABEL: name: load_local_v2p3
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX7-NEXT: $m0 = S_MOV_B32 -1
+ ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
; GFX9-LABEL: name: load_local_v2p3
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ;
; GFX10-LABEL: name: load_local_v2p3
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
@@ -624,7 +586,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX7-LABEL: name: load_local_v2s16
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -632,14 +593,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
- ;
; GFX9-LABEL: name: load_local_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_v2s16
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -670,7 +629,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX7-LABEL: name: load_local_v4s16
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -678,14 +636,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ;
; GFX9-LABEL: name: load_local_v4s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_v4s16
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -742,7 +698,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65535
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -750,14 +705,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65535
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -792,7 +745,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -802,7 +754,6 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -811,7 +762,6 @@ body: |
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -851,7 +801,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_65536
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -861,7 +810,6 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -870,7 +818,6 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_65536
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -907,7 +854,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX7-LABEL: name: load_local_s32_from_1_gep_m1
; GFX7: liveins: $vgpr0
; GFX7-NEXT: {{ $}}
@@ -917,7 +863,6 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
- ;
; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -926,7 +871,6 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s32_from_1_gep_m1
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
@@ -963,7 +907,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
- ;
; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
@@ -971,14 +914,12 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
- ;
; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
@@ -1013,7 +954,6 @@ body: |
; GFX6-NEXT: $m0 = S_MOV_B32 -1
; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
- ;
; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX7: liveins: $vgpr0_vgpr1
; GFX7-NEXT: {{ $}}
@@ -1023,7 +963,6 @@ body: |
; GFX7-NEXT: $m0 = S_MOV_B32 -1
; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
- ;
; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -1032,7 +971,6 @@ body: |
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
- ;
; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020
; GFX10: liveins: $vgpr0_vgpr1
; GFX10-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index ff1d3fe3796732..a63df136e003c3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -6604,25 +6604,22 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4)
$vgpr0_vgpr1 = COPY %1
@@ -6638,25 +6635,22 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4)
$vgpr0_vgpr1 = COPY %1
@@ -6689,6 +6683,7 @@ body: |
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6704,9 +6699,9 @@ body: |
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -6729,6 +6724,7 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6744,9 +6740,9 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align1
; GFX9: liveins: $vgpr0_vgpr1
@@ -6769,6 +6765,7 @@ body: |
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
@@ -6784,9 +6781,9 @@ body: |
; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 3b166660a84b75..b1d7d36f9912e7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -15192,73 +15192,65 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3), align 8)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3), align 8)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8
; GFX9PLUS: liveins: $vgpr0_vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8
; GFX11PLUS: liveins: $vgpr0_vgpr1
; GFX11PLUS-NEXT: {{ $}}
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align8
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX9PLUS-NEXT: {{ $}}
; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX11PLUS-NEXT: {{ $}}
; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align8
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>))
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0)
$vgpr0_vgpr1 = COPY %1
@@ -15274,73 +15266,65 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3))
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4
; GFX9PLUS: liveins: $vgpr0_vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4
; GFX11PLUS: liveins: $vgpr0_vgpr1
; GFX11PLUS-NEXT: {{ $}}
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align4
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX9PLUS-NEXT: {{ $}}
; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX11PLUS-NEXT: {{ $}}
; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align4
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 0)
$vgpr0_vgpr1 = COPY %1
@@ -15373,6 +15357,7 @@ body: |
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15388,9 +15373,9 @@ body: |
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_flat_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -15413,6 +15398,7 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15428,9 +15414,9 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1
; GFX9PLUS: liveins: $vgpr0_vgpr1
@@ -15453,6 +15439,7 @@ body: |
; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15468,9 +15455,9 @@ body: |
; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1
; GFX11PLUS: liveins: $vgpr0_vgpr1
@@ -15493,6 +15480,7 @@ body: |
; GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15508,9 +15496,9 @@ body: |
; GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_flat_v2p3_align1
; GFX12: liveins: $vgpr0_vgpr1
@@ -15533,6 +15521,7 @@ body: |
; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15548,9 +15537,9 @@ body: |
; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX12-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1
@@ -15573,6 +15562,7 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15588,9 +15578,9 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1
@@ -15613,6 +15603,7 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15628,9 +15619,9 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align1
; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1
@@ -15653,6 +15644,7 @@ body: |
; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4)
@@ -15668,9 +15660,9 @@ body: |
; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index f384114ee4cde7..d6acc6ecdfc660 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -13448,49 +13448,43 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align8
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align8
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
@@ -13506,49 +13500,43 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align4
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align4
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1)
$vgpr0_vgpr1 = COPY %1
@@ -13581,6 +13569,7 @@ body: |
; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13596,17 +13585,16 @@ body: |
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align1
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align1
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -13629,6 +13617,7 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13644,9 +13633,9 @@ body: |
; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
@@ -13669,6 +13658,7 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13684,17 +13674,16 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1
; GFX9-MESA: liveins: $vgpr0_vgpr1
@@ -13717,6 +13706,7 @@ body: |
; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
+ ; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
@@ -13732,9 +13722,9 @@ body: |
; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 1608234d6b2bc5..1249de647bb759 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -14595,81 +14595,71 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-LABEL: name: test_load_local_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; CI-DS128-LABEL: name: test_load_local_v2p3_align8
; CI-DS128: liveins: $vgpr0
; CI-DS128-NEXT: {{ $}}
; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; VI-LABEL: name: test_load_local_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_local_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX9-UNALIGNED: liveins: $vgpr0
; GFX9-UNALIGNED-NEXT: {{ $}}
; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_local_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX10-UNALIGNED: liveins: $vgpr0
; GFX10-UNALIGNED-NEXT: {{ $}}
; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_local_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX11-UNALIGNED: liveins: $vgpr0
; GFX11-UNALIGNED-NEXT: {{ $}}
; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+ ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 472cbe559e56f4..741f878c86f8b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -16526,117 +16526,106 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; CI-LABEL: name: test_load_private_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; VI-LABEL: name: test_load_private_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_private_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_private_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_private_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_private_v2p3_align8
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX9-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX9: liveins: $vgpr0
; UNALIGNED_GFX9-NEXT: {{ $}}
; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; UNALIGNED_GFX10-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX10: liveins: $vgpr0
; UNALIGNED_GFX10-NEXT: {{ $}}
; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
+ ; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
+ ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
;
; UNALIGNED_GFX11-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX11: liveins: $vgpr0
; UNALIGNED_GFX11-NEXT: {{ $}}
; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
+ ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX12: liveins: $vgpr0
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
- ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
+ ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
+ ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:_(p5) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index b9c72d39ed45b6..f2a88a21a286ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -2310,9 +2310,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+ ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2332,7 +2332,8 @@ body: |
; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -2352,17 +2353,16 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+ ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2383,7 +2383,8 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
@@ -2404,8 +2405,7 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1)
@@ -2422,9 +2422,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+ ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2433,7 +2433,8 @@ body: |
; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -2444,17 +2445,16 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align2
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+ ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -2463,7 +2463,8 @@ body: |
; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -2474,8 +2475,7 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1)
@@ -2492,32 +2492,28 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align4
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1)
@@ -2534,32 +2530,28 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align8
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1)
@@ -2576,32 +2568,28 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align16
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align16
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1)
More information about the llvm-commits
mailing list