[llvm] 86813e2 - AMDGPU/GlobalISel: Select llvm.amdgcn.s.buffer.load
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 08:03:46 PST 2020
Author: Matt Arsenault
Date: 2020-02-17T08:02:40-08:00
New Revision: 86813e2768e98bcfada2700d5e49494bbab60e42
URL: https://github.com/llvm/llvm-project/commit/86813e2768e98bcfada2700d5e49494bbab60e42
DIFF: https://github.com/llvm/llvm-project/commit/86813e2768e98bcfada2700d5e49494bbab60e42.diff
LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.s.buffer.load
Doesn't try to fail on the dlc bit pre-gfx10 like the DAG lowering
does.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/SMInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index f8fee8621a51..63b1b48c651d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -108,6 +108,14 @@ def gi_mubuf_offset_atomic :
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
GIComplexPatternEquiv<MUBUFOffsetAtomic>;
+def gi_smrd_buffer_imm :
+ GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
+ GIComplexPatternEquiv<SMRDBufferImm>;
+
+def gi_smrd_buffer_imm32 :
+ GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
+ GIComplexPatternEquiv<SMRDBufferImm32>;
+
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
// directly before before selecting a glue-less load, so hide this
@@ -182,6 +190,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
+def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
class GISelSop2Pat <
SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b47685990960..606cd09f8d9e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2859,6 +2859,45 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
}};
}
+/// Get an immediate that must be 32-bits, and treated as zero extended.
+static Optional<uint64_t> getConstantZext32Val(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ // getConstantVRegVal sexts any values, so see if that matters.
+ Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
+ if (!OffsetVal || !isInt<32>(*OffsetVal))
+ return None;
+ return Lo_32(*OffsetVal);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
+ Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
+ if (!OffsetVal)
+ return {};
+
+ Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal);
+ if (!EncodedImm)
+ return {};
+
+ return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
+ assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
+
+ Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
+ if (!OffsetVal)
+ return {};
+
+ Optional<int64_t> EncodedImm
+ = AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
+ if (!EncodedImm)
+ return {};
+
+ return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index d440932c72dd..9422aafb0608 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -222,6 +222,9 @@ class AMDGPUInstructionSelector : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectMUBUFAddr64Atomic(MachineOperand &Root) const;
+ ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
+ ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
+
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 43e41f562445..0e8e3f944f3d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -769,7 +769,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy)))> {
let AddedComplexity = 2;
}
@@ -777,7 +777,8 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 2. 32-bit IMM offset on CI
def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
- (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
+ (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
+ (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
let OtherPredicates = [isGFX7Only];
let AddedComplexity = 1;
}
@@ -785,7 +786,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy)))
>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
new file mode 100644
index 000000000000..9c3da4617310
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -0,0 +1,4281 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
+
+; FIXME: Merge with regbankselect, which mostly overlaps when all types supported.
+
+; Natural mapping
+define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_i32
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_glc
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_glc
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_glc
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
+ ret i32 %val
+}
+
+define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v2i32
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX6: $sgpr0 = COPY [[COPY5]]
+ ; GFX6: $sgpr1 = COPY [[COPY6]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ; GFX7-LABEL: name: s_buffer_load_v2i32
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX7: $sgpr0 = COPY [[COPY5]]
+ ; GFX7: $sgpr1 = COPY [[COPY6]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ; GFX8-LABEL: name: s_buffer_load_v2i32
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 8, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX8: $sgpr0 = COPY [[COPY5]]
+ ; GFX8: $sgpr1 = COPY [[COPY6]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <2 x i32> %val
+}
+
+define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v3i32
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
+ ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
+ ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
+ ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
+ ; GFX6: $sgpr0 = COPY [[COPY6]]
+ ; GFX6: $sgpr1 = COPY [[COPY7]]
+ ; GFX6: $sgpr2 = COPY [[COPY8]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ ; GFX7-LABEL: name: s_buffer_load_v3i32
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
+ ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
+ ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
+ ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
+ ; GFX7: $sgpr0 = COPY [[COPY6]]
+ ; GFX7: $sgpr1 = COPY [[COPY7]]
+ ; GFX7: $sgpr2 = COPY [[COPY8]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ ; GFX8-LABEL: name: s_buffer_load_v3i32
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2
+ ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
+ ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
+ ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
+ ; GFX8: $sgpr0 = COPY [[COPY6]]
+ ; GFX8: $sgpr1 = COPY [[COPY7]]
+ ; GFX8: $sgpr2 = COPY [[COPY8]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <3 x i32> %val
+}
+
+define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v8i32
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sreg_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX6: $sgpr0 = COPY [[COPY5]]
+ ; GFX6: $sgpr1 = COPY [[COPY6]]
+ ; GFX6: $sgpr2 = COPY [[COPY7]]
+ ; GFX6: $sgpr3 = COPY [[COPY8]]
+ ; GFX6: $sgpr4 = COPY [[COPY9]]
+ ; GFX6: $sgpr5 = COPY [[COPY10]]
+ ; GFX6: $sgpr6 = COPY [[COPY11]]
+ ; GFX6: $sgpr7 = COPY [[COPY12]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8i32
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sreg_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX7: $sgpr0 = COPY [[COPY5]]
+ ; GFX7: $sgpr1 = COPY [[COPY6]]
+ ; GFX7: $sgpr2 = COPY [[COPY7]]
+ ; GFX7: $sgpr3 = COPY [[COPY8]]
+ ; GFX7: $sgpr4 = COPY [[COPY9]]
+ ; GFX7: $sgpr5 = COPY [[COPY10]]
+ ; GFX7: $sgpr6 = COPY [[COPY11]]
+ ; GFX7: $sgpr7 = COPY [[COPY12]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8i32
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sreg_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 32, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX8: $sgpr0 = COPY [[COPY5]]
+ ; GFX8: $sgpr1 = COPY [[COPY6]]
+ ; GFX8: $sgpr2 = COPY [[COPY7]]
+ ; GFX8: $sgpr3 = COPY [[COPY8]]
+ ; GFX8: $sgpr4 = COPY [[COPY9]]
+ ; GFX8: $sgpr5 = COPY [[COPY10]]
+ ; GFX8: $sgpr6 = COPY [[COPY11]]
+ ; GFX8: $sgpr7 = COPY [[COPY12]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x i32> %val
+}
+
+define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v16i32
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sreg_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX6: $sgpr0 = COPY [[COPY5]]
+ ; GFX6: $sgpr1 = COPY [[COPY6]]
+ ; GFX6: $sgpr2 = COPY [[COPY7]]
+ ; GFX6: $sgpr3 = COPY [[COPY8]]
+ ; GFX6: $sgpr4 = COPY [[COPY9]]
+ ; GFX6: $sgpr5 = COPY [[COPY10]]
+ ; GFX6: $sgpr6 = COPY [[COPY11]]
+ ; GFX6: $sgpr7 = COPY [[COPY12]]
+ ; GFX6: $sgpr8 = COPY [[COPY13]]
+ ; GFX6: $sgpr9 = COPY [[COPY14]]
+ ; GFX6: $sgpr10 = COPY [[COPY15]]
+ ; GFX6: $sgpr11 = COPY [[COPY16]]
+ ; GFX6: $sgpr12 = COPY [[COPY17]]
+ ; GFX6: $sgpr13 = COPY [[COPY18]]
+ ; GFX6: $sgpr14 = COPY [[COPY19]]
+ ; GFX6: $sgpr15 = COPY [[COPY20]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; GFX7-LABEL: name: s_buffer_load_v16i32
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sreg_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX7: $sgpr0 = COPY [[COPY5]]
+ ; GFX7: $sgpr1 = COPY [[COPY6]]
+ ; GFX7: $sgpr2 = COPY [[COPY7]]
+ ; GFX7: $sgpr3 = COPY [[COPY8]]
+ ; GFX7: $sgpr4 = COPY [[COPY9]]
+ ; GFX7: $sgpr5 = COPY [[COPY10]]
+ ; GFX7: $sgpr6 = COPY [[COPY11]]
+ ; GFX7: $sgpr7 = COPY [[COPY12]]
+ ; GFX7: $sgpr8 = COPY [[COPY13]]
+ ; GFX7: $sgpr9 = COPY [[COPY14]]
+ ; GFX7: $sgpr10 = COPY [[COPY15]]
+ ; GFX7: $sgpr11 = COPY [[COPY16]]
+ ; GFX7: $sgpr12 = COPY [[COPY17]]
+ ; GFX7: $sgpr13 = COPY [[COPY18]]
+ ; GFX7: $sgpr14 = COPY [[COPY19]]
+ ; GFX7: $sgpr15 = COPY [[COPY20]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; GFX8-LABEL: name: s_buffer_load_v16i32
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sreg_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 64, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX8: $sgpr0 = COPY [[COPY5]]
+ ; GFX8: $sgpr1 = COPY [[COPY6]]
+ ; GFX8: $sgpr2 = COPY [[COPY7]]
+ ; GFX8: $sgpr3 = COPY [[COPY8]]
+ ; GFX8: $sgpr4 = COPY [[COPY9]]
+ ; GFX8: $sgpr5 = COPY [[COPY10]]
+ ; GFX8: $sgpr6 = COPY [[COPY11]]
+ ; GFX8: $sgpr7 = COPY [[COPY12]]
+ ; GFX8: $sgpr8 = COPY [[COPY13]]
+ ; GFX8: $sgpr9 = COPY [[COPY14]]
+ ; GFX8: $sgpr10 = COPY [[COPY15]]
+ ; GFX8: $sgpr11 = COPY [[COPY16]]
+ ; GFX8: $sgpr12 = COPY [[COPY17]]
+ ; GFX8: $sgpr13 = COPY [[COPY18]]
+ ; GFX8: $sgpr14 = COPY [[COPY19]]
+ ; GFX8: $sgpr15 = COPY [[COPY20]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <16 x i32> %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_1
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_1
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_1
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_255
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_255
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_255
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_256
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_256
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_256
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
+ ret i32 %val
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
+ ret i32 %load
+}
+
+define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
+ ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
+ ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX6: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX7: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
+ ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
+ ; GFX8: $sgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
+ ret i32 %load
+}
+
+; Check cases that need to be converted to MUBUF due to the offset being a VGPR.
+define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <2 x float> %val
+}
+
+define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
+ ; GFX6: $vgpr0 = COPY [[COPY6]]
+ ; GFX6: $vgpr1 = COPY [[COPY7]]
+ ; GFX6: $vgpr2 = COPY [[COPY8]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
+ ; GFX7: $vgpr0 = COPY [[COPY6]]
+ ; GFX7: $vgpr1 = COPY [[COPY7]]
+ ; GFX7: $vgpr2 = COPY [[COPY8]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2
+ ; GFX8: $vgpr0 = COPY [[COPY6]]
+ ; GFX8: $vgpr1 = COPY [[COPY7]]
+ ; GFX8: $vgpr2 = COPY [[COPY8]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <3 x float> %val
+}
+
+define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <4 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: $vgpr8 = COPY [[COPY13]]
+ ; GFX6: $vgpr9 = COPY [[COPY14]]
+ ; GFX6: $vgpr10 = COPY [[COPY15]]
+ ; GFX6: $vgpr11 = COPY [[COPY16]]
+ ; GFX6: $vgpr12 = COPY [[COPY17]]
+ ; GFX6: $vgpr13 = COPY [[COPY18]]
+ ; GFX6: $vgpr14 = COPY [[COPY19]]
+ ; GFX6: $vgpr15 = COPY [[COPY20]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: $vgpr8 = COPY [[COPY13]]
+ ; GFX7: $vgpr9 = COPY [[COPY14]]
+ ; GFX7: $vgpr10 = COPY [[COPY15]]
+ ; GFX7: $vgpr11 = COPY [[COPY16]]
+ ; GFX7: $vgpr12 = COPY [[COPY17]]
+ ; GFX7: $vgpr13 = COPY [[COPY18]]
+ ; GFX7: $vgpr14 = COPY [[COPY19]]
+ ; GFX7: $vgpr15 = COPY [[COPY20]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: $vgpr8 = COPY [[COPY13]]
+ ; GFX8: $vgpr9 = COPY [[COPY14]]
+ ; GFX8: $vgpr10 = COPY [[COPY15]]
+ ; GFX8: $vgpr11 = COPY [[COPY16]]
+ ; GFX8: $vgpr12 = COPY [[COPY17]]
+ ; GFX8: $vgpr13 = COPY [[COPY18]]
+ ; GFX8: $vgpr14 = COPY [[COPY19]]
+ ; GFX8: $vgpr15 = COPY [[COPY20]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <16 x float> %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %soffset = add i32 %soffset.base, 4092
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %soffset = add i32 %soffset.base, 4095
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %soffset = add i32 %soffset.base, 4096
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+; Make sure the base offset is added to each split load.
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %soffset.base, 4064
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+; Make sure the maximum offset isn't exeeded when splitting this
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %soffset.base, 4068
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: $vgpr8 = COPY [[COPY13]]
+ ; GFX6: $vgpr9 = COPY [[COPY14]]
+ ; GFX6: $vgpr10 = COPY [[COPY15]]
+ ; GFX6: $vgpr11 = COPY [[COPY16]]
+ ; GFX6: $vgpr12 = COPY [[COPY17]]
+ ; GFX6: $vgpr13 = COPY [[COPY18]]
+ ; GFX6: $vgpr14 = COPY [[COPY19]]
+ ; GFX6: $vgpr15 = COPY [[COPY20]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: $vgpr8 = COPY [[COPY13]]
+ ; GFX7: $vgpr9 = COPY [[COPY14]]
+ ; GFX7: $vgpr10 = COPY [[COPY15]]
+ ; GFX7: $vgpr11 = COPY [[COPY16]]
+ ; GFX7: $vgpr12 = COPY [[COPY17]]
+ ; GFX7: $vgpr13 = COPY [[COPY18]]
+ ; GFX7: $vgpr14 = COPY [[COPY19]]
+ ; GFX7: $vgpr15 = COPY [[COPY20]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: $vgpr8 = COPY [[COPY13]]
+ ; GFX8: $vgpr9 = COPY [[COPY14]]
+ ; GFX8: $vgpr10 = COPY [[COPY15]]
+ ; GFX8: $vgpr11 = COPY [[COPY16]]
+ ; GFX8: $vgpr12 = COPY [[COPY17]]
+ ; GFX8: $vgpr13 = COPY [[COPY18]]
+ ; GFX8: $vgpr14 = COPY [[COPY19]]
+ ; GFX8: $vgpr15 = COPY [[COPY20]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ %soffset = add i32 %soffset.base, 4032
+ %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <16 x float> %val
+}
+
+define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6: $vgpr0 = COPY [[COPY5]]
+ ; GFX6: $vgpr1 = COPY [[COPY6]]
+ ; GFX6: $vgpr2 = COPY [[COPY7]]
+ ; GFX6: $vgpr3 = COPY [[COPY8]]
+ ; GFX6: $vgpr4 = COPY [[COPY9]]
+ ; GFX6: $vgpr5 = COPY [[COPY10]]
+ ; GFX6: $vgpr6 = COPY [[COPY11]]
+ ; GFX6: $vgpr7 = COPY [[COPY12]]
+ ; GFX6: $vgpr8 = COPY [[COPY13]]
+ ; GFX6: $vgpr9 = COPY [[COPY14]]
+ ; GFX6: $vgpr10 = COPY [[COPY15]]
+ ; GFX6: $vgpr11 = COPY [[COPY16]]
+ ; GFX6: $vgpr12 = COPY [[COPY17]]
+ ; GFX6: $vgpr13 = COPY [[COPY18]]
+ ; GFX6: $vgpr14 = COPY [[COPY19]]
+ ; GFX6: $vgpr15 = COPY [[COPY20]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7: $vgpr0 = COPY [[COPY5]]
+ ; GFX7: $vgpr1 = COPY [[COPY6]]
+ ; GFX7: $vgpr2 = COPY [[COPY7]]
+ ; GFX7: $vgpr3 = COPY [[COPY8]]
+ ; GFX7: $vgpr4 = COPY [[COPY9]]
+ ; GFX7: $vgpr5 = COPY [[COPY10]]
+ ; GFX7: $vgpr6 = COPY [[COPY11]]
+ ; GFX7: $vgpr7 = COPY [[COPY12]]
+ ; GFX7: $vgpr8 = COPY [[COPY13]]
+ ; GFX7: $vgpr9 = COPY [[COPY14]]
+ ; GFX7: $vgpr10 = COPY [[COPY15]]
+ ; GFX7: $vgpr11 = COPY [[COPY16]]
+ ; GFX7: $vgpr12 = COPY [[COPY17]]
+ ; GFX7: $vgpr13 = COPY [[COPY18]]
+ ; GFX7: $vgpr14 = COPY [[COPY19]]
+ ; GFX7: $vgpr15 = COPY [[COPY20]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8: $vgpr0 = COPY [[COPY5]]
+ ; GFX8: $vgpr1 = COPY [[COPY6]]
+ ; GFX8: $vgpr2 = COPY [[COPY7]]
+ ; GFX8: $vgpr3 = COPY [[COPY8]]
+ ; GFX8: $vgpr4 = COPY [[COPY9]]
+ ; GFX8: $vgpr5 = COPY [[COPY10]]
+ ; GFX8: $vgpr6 = COPY [[COPY11]]
+ ; GFX8: $vgpr7 = COPY [[COPY12]]
+ ; GFX8: $vgpr8 = COPY [[COPY13]]
+ ; GFX8: $vgpr9 = COPY [[COPY14]]
+ ; GFX8: $vgpr10 = COPY [[COPY15]]
+ ; GFX8: $vgpr11 = COPY [[COPY16]]
+ ; GFX8: $vgpr12 = COPY [[COPY17]]
+ ; GFX8: $vgpr13 = COPY [[COPY18]]
+ ; GFX8: $vgpr14 = COPY [[COPY19]]
+ ; GFX8: $vgpr15 = COPY [[COPY20]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ %soffset = add i32 %soffset.base, 4036
+ %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <16 x float> %val
+}
+
+; Waterfall loop due to resource being VGPR
+define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+; Use the offset inside the waterfall loop
+define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %soffset = add i32 %soffset.base, 4092
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+; Scalar offset exceeds MUBUF limit, keep add out of the loop
+define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %soffset = add i32 %soffset.base, 4096
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret float %val
+}
+
+; Waterfall loop, but constant offset
+define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4, align 1)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4, align 1)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4, align 1)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
+ ret float %val
+}
+
+; Waterfall loop, but constant offset
+define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
+ ret float %val
+}
+
+; Need a waterfall loop, but the offset is scalar.
+; Make sure the base offset is added to each split load.
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY7]]
+ ; GFX6: $vgpr1 = COPY [[COPY8]]
+ ; GFX6: $vgpr2 = COPY [[COPY9]]
+ ; GFX6: $vgpr3 = COPY [[COPY10]]
+ ; GFX6: $vgpr4 = COPY [[COPY11]]
+ ; GFX6: $vgpr5 = COPY [[COPY12]]
+ ; GFX6: $vgpr6 = COPY [[COPY13]]
+ ; GFX6: $vgpr7 = COPY [[COPY14]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY7]]
+ ; GFX7: $vgpr1 = COPY [[COPY8]]
+ ; GFX7: $vgpr2 = COPY [[COPY9]]
+ ; GFX7: $vgpr3 = COPY [[COPY10]]
+ ; GFX7: $vgpr4 = COPY [[COPY11]]
+ ; GFX7: $vgpr5 = COPY [[COPY12]]
+ ; GFX7: $vgpr6 = COPY [[COPY13]]
+ ; GFX7: $vgpr7 = COPY [[COPY14]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY7]]
+ ; GFX8: $vgpr1 = COPY [[COPY8]]
+ ; GFX8: $vgpr2 = COPY [[COPY9]]
+ ; GFX8: $vgpr3 = COPY [[COPY10]]
+ ; GFX8: $vgpr4 = COPY [[COPY11]]
+ ; GFX8: $vgpr5 = COPY [[COPY12]]
+ ; GFX8: $vgpr6 = COPY [[COPY13]]
+ ; GFX8: $vgpr7 = COPY [[COPY14]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %soffset.base, 4064
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+; Need a waterfall loop, but the offset is scalar.
+; Make sure the maximum offset isn't exeeded when splitting this
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY8]]
+ ; GFX6: $vgpr1 = COPY [[COPY9]]
+ ; GFX6: $vgpr2 = COPY [[COPY10]]
+ ; GFX6: $vgpr3 = COPY [[COPY11]]
+ ; GFX6: $vgpr4 = COPY [[COPY12]]
+ ; GFX6: $vgpr5 = COPY [[COPY13]]
+ ; GFX6: $vgpr6 = COPY [[COPY14]]
+ ; GFX6: $vgpr7 = COPY [[COPY15]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY8]]
+ ; GFX7: $vgpr1 = COPY [[COPY9]]
+ ; GFX7: $vgpr2 = COPY [[COPY10]]
+ ; GFX7: $vgpr3 = COPY [[COPY11]]
+ ; GFX7: $vgpr4 = COPY [[COPY12]]
+ ; GFX7: $vgpr5 = COPY [[COPY13]]
+ ; GFX7: $vgpr6 = COPY [[COPY14]]
+ ; GFX7: $vgpr7 = COPY [[COPY15]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY8]]
+ ; GFX8: $vgpr1 = COPY [[COPY9]]
+ ; GFX8: $vgpr2 = COPY [[COPY10]]
+ ; GFX8: $vgpr3 = COPY [[COPY11]]
+ ; GFX8: $vgpr4 = COPY [[COPY12]]
+ ; GFX8: $vgpr5 = COPY [[COPY13]]
+ ; GFX8: $vgpr6 = COPY [[COPY14]]
+ ; GFX8: $vgpr7 = COPY [[COPY15]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %soffset.base, 4068
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY8]]
+ ; GFX6: $vgpr1 = COPY [[COPY9]]
+ ; GFX6: $vgpr2 = COPY [[COPY10]]
+ ; GFX6: $vgpr3 = COPY [[COPY11]]
+ ; GFX6: $vgpr4 = COPY [[COPY12]]
+ ; GFX6: $vgpr5 = COPY [[COPY13]]
+ ; GFX6: $vgpr6 = COPY [[COPY14]]
+ ; GFX6: $vgpr7 = COPY [[COPY15]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY8]]
+ ; GFX7: $vgpr1 = COPY [[COPY9]]
+ ; GFX7: $vgpr2 = COPY [[COPY10]]
+ ; GFX7: $vgpr3 = COPY [[COPY11]]
+ ; GFX7: $vgpr4 = COPY [[COPY12]]
+ ; GFX7: $vgpr5 = COPY [[COPY13]]
+ ; GFX7: $vgpr6 = COPY [[COPY14]]
+ ; GFX7: $vgpr7 = COPY [[COPY15]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY8]]
+ ; GFX8: $vgpr1 = COPY [[COPY9]]
+ ; GFX8: $vgpr2 = COPY [[COPY10]]
+ ; GFX8: $vgpr3 = COPY [[COPY11]]
+ ; GFX8: $vgpr4 = COPY [[COPY12]]
+ ; GFX8: $vgpr5 = COPY [[COPY13]]
+ ; GFX8: $vgpr6 = COPY [[COPY14]]
+ ; GFX8: $vgpr7 = COPY [[COPY15]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %soffset.base, 4096
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY7]]
+ ; GFX6: $vgpr1 = COPY [[COPY8]]
+ ; GFX6: $vgpr2 = COPY [[COPY9]]
+ ; GFX6: $vgpr3 = COPY [[COPY10]]
+ ; GFX6: $vgpr4 = COPY [[COPY11]]
+ ; GFX6: $vgpr5 = COPY [[COPY12]]
+ ; GFX6: $vgpr6 = COPY [[COPY13]]
+ ; GFX6: $vgpr7 = COPY [[COPY14]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY7]]
+ ; GFX7: $vgpr1 = COPY [[COPY8]]
+ ; GFX7: $vgpr2 = COPY [[COPY9]]
+ ; GFX7: $vgpr3 = COPY [[COPY10]]
+ ; GFX7: $vgpr4 = COPY [[COPY11]]
+ ; GFX7: $vgpr5 = COPY [[COPY12]]
+ ; GFX7: $vgpr6 = COPY [[COPY13]]
+ ; GFX7: $vgpr7 = COPY [[COPY14]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY7]]
+ ; GFX8: $vgpr1 = COPY [[COPY8]]
+ ; GFX8: $vgpr2 = COPY [[COPY9]]
+ ; GFX8: $vgpr3 = COPY [[COPY10]]
+ ; GFX8: $vgpr4 = COPY [[COPY11]]
+ ; GFX8: $vgpr5 = COPY [[COPY12]]
+ ; GFX8: $vgpr6 = COPY [[COPY13]]
+ ; GFX8: $vgpr7 = COPY [[COPY14]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %offset.base, 5000
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY7]]
+ ; GFX6: $vgpr1 = COPY [[COPY8]]
+ ; GFX6: $vgpr2 = COPY [[COPY9]]
+ ; GFX6: $vgpr3 = COPY [[COPY10]]
+ ; GFX6: $vgpr4 = COPY [[COPY11]]
+ ; GFX6: $vgpr5 = COPY [[COPY12]]
+ ; GFX6: $vgpr6 = COPY [[COPY13]]
+ ; GFX6: $vgpr7 = COPY [[COPY14]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY7]]
+ ; GFX7: $vgpr1 = COPY [[COPY8]]
+ ; GFX7: $vgpr2 = COPY [[COPY9]]
+ ; GFX7: $vgpr3 = COPY [[COPY10]]
+ ; GFX7: $vgpr4 = COPY [[COPY11]]
+ ; GFX7: $vgpr5 = COPY [[COPY12]]
+ ; GFX7: $vgpr6 = COPY [[COPY13]]
+ ; GFX7: $vgpr7 = COPY [[COPY14]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY7]]
+ ; GFX8: $vgpr1 = COPY [[COPY8]]
+ ; GFX8: $vgpr2 = COPY [[COPY9]]
+ ; GFX8: $vgpr3 = COPY [[COPY10]]
+ ; GFX8: $vgpr4 = COPY [[COPY11]]
+ ; GFX8: $vgpr5 = COPY [[COPY12]]
+ ; GFX8: $vgpr6 = COPY [[COPY13]]
+ ; GFX8: $vgpr7 = COPY [[COPY14]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %offset.base, 4076
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY7]]
+ ; GFX6: $vgpr1 = COPY [[COPY8]]
+ ; GFX6: $vgpr2 = COPY [[COPY9]]
+ ; GFX6: $vgpr3 = COPY [[COPY10]]
+ ; GFX6: $vgpr4 = COPY [[COPY11]]
+ ; GFX6: $vgpr5 = COPY [[COPY12]]
+ ; GFX6: $vgpr6 = COPY [[COPY13]]
+ ; GFX6: $vgpr7 = COPY [[COPY14]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY7]]
+ ; GFX7: $vgpr1 = COPY [[COPY8]]
+ ; GFX7: $vgpr2 = COPY [[COPY9]]
+ ; GFX7: $vgpr3 = COPY [[COPY10]]
+ ; GFX7: $vgpr4 = COPY [[COPY11]]
+ ; GFX7: $vgpr5 = COPY [[COPY12]]
+ ; GFX7: $vgpr6 = COPY [[COPY13]]
+ ; GFX7: $vgpr7 = COPY [[COPY14]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY7]]
+ ; GFX8: $vgpr1 = COPY [[COPY8]]
+ ; GFX8: $vgpr2 = COPY [[COPY9]]
+ ; GFX8: $vgpr3 = COPY [[COPY10]]
+ ; GFX8: $vgpr4 = COPY [[COPY11]]
+ ; GFX8: $vgpr5 = COPY [[COPY12]]
+ ; GFX8: $vgpr6 = COPY [[COPY13]]
+ ; GFX8: $vgpr7 = COPY [[COPY14]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %soffset = add i32 %offset.base, 4080
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6: bb.2:
+ ; GFX6: successors: %bb.3, %bb.2
+ ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: bb.3:
+ ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6: bb.4:
+ ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6: $vgpr0 = COPY [[COPY6]]
+ ; GFX6: $vgpr1 = COPY [[COPY7]]
+ ; GFX6: $vgpr2 = COPY [[COPY8]]
+ ; GFX6: $vgpr3 = COPY [[COPY9]]
+ ; GFX6: $vgpr4 = COPY [[COPY10]]
+ ; GFX6: $vgpr5 = COPY [[COPY11]]
+ ; GFX6: $vgpr6 = COPY [[COPY12]]
+ ; GFX6: $vgpr7 = COPY [[COPY13]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7: bb.2:
+ ; GFX7: successors: %bb.3, %bb.2
+ ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: bb.3:
+ ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7: bb.4:
+ ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7: $vgpr0 = COPY [[COPY6]]
+ ; GFX7: $vgpr1 = COPY [[COPY7]]
+ ; GFX7: $vgpr2 = COPY [[COPY8]]
+ ; GFX7: $vgpr3 = COPY [[COPY9]]
+ ; GFX7: $vgpr4 = COPY [[COPY10]]
+ ; GFX7: $vgpr5 = COPY [[COPY11]]
+ ; GFX7: $vgpr6 = COPY [[COPY12]]
+ ; GFX7: $vgpr7 = COPY [[COPY13]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8: bb.2:
+ ; GFX8: successors: %bb.3, %bb.2
+ ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4)
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: bb.3:
+ ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8: bb.4:
+ ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8: $vgpr0 = COPY [[COPY6]]
+ ; GFX8: $vgpr1 = COPY [[COPY7]]
+ ; GFX8: $vgpr2 = COPY [[COPY8]]
+ ; GFX8: $vgpr3 = COPY [[COPY9]]
+ ; GFX8: $vgpr4 = COPY [[COPY10]]
+ ; GFX8: $vgpr5 = COPY [[COPY11]]
+ ; GFX8: $vgpr6 = COPY [[COPY12]]
+ ; GFX8: $vgpr7 = COPY [[COPY13]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
+ ret <8 x float> %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset = add i32 %offset.v, %offset.s
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset = add i32 %offset.s, %offset.v
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset.base = add i32 %offset.v, %offset.s
+ %offset = add i32 %offset.base, 1024
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset.base = add i32 %offset.s, %offset.v
+ %offset = add i32 %offset.base, 1024
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+; TODO: Ideally this would be reassociated to fold.
+define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset.base = add i32 %offset.s, 1024
+ %offset = add i32 %offset.base, %offset.v
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
+ ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset.base = add i32 %offset.v, 1024
+ %offset = add i32 %offset.base, %offset.s
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
+declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
+declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
+declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
+declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
+declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
+
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg)
+declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
+declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
+declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
+declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
+declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
+
+declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
+declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
+declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
+
+declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
+declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
+
+declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
+declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
+
+declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg)
+declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg)
More information about the llvm-commits
mailing list