[llvm] 35c3d10 - AMDGPU/GlobalISel: Select G_EXTRACT_VECTOR_ELT
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 16:54:47 PST 2020
Author: Matt Arsenault
Date: 2020-01-09T19:52:24-05:00
New Revision: 35c3d101aee240f6c034f25ff6800fda22a89987
URL: https://github.com/llvm/llvm-project/commit/35c3d101aee240f6c034f25ff6800fda22a89987
DIFF: https://github.com/llvm/llvm-project/commit/35c3d101aee240f6c034f25ff6800fda22a89987.diff
LOG: AMDGPU/GlobalISel: Select G_EXTRACT_VECTOR_ELT
Doesn't try to do the fold into the base register of an add of a
constant in the index like the DAG path does.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 72ccf0df4f33..5dacc0993fc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1605,6 +1605,80 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
+ MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register IdxReg = MI.getOperand(2).getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
+
+ // The index must be scalar. If it wasn't RegBankSelect should have moved this
+ // into a waterfall loop.
+ if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB,
+ *MRI);
+ const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
+ *MRI);
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const bool Is64 = DstTy.getSizeInBits() == 64;
+
+ unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+
+ if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
+ if (DstTy.getSizeInBits() != 32 && !Is64)
+ return false;
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+
+ unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
+ BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)
+ .addReg(SrcReg, 0, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)
+ return false;
+
+ if (!STI.useVGPRIndexMode()) {
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(IdxReg)
+ .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -1693,6 +1767,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_FRAME_INDEX(I);
case TargetOpcode::G_PTR_MASK:
return selectG_PTR_MASK(I);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return selectG_EXTRACT_VECTOR_ELT(I);
default:
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 633c4d35137e..d884afbe7707 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -116,6 +116,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_FRAME_INDEX(MachineInstr &I) const;
bool selectG_PTR_MASK(MachineInstr &I) const;
+ bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
std::pair<Register, unsigned>
selectVOP3ModsImpl(Register Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 46aea16a2bea..16bde062b1d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -45,6 +45,11 @@ static cl::opt<bool> DisablePowerSched(
cl::desc("Disable scheduling to minimize mAI power bursts"),
cl::init(false));
+static cl::opt<bool> EnableVGPRIndexMode(
+ "amdgpu-vgpr-index-mode",
+ cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
+ cl::init(false));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
@@ -561,6 +566,10 @@ bool GCNSubtarget::hasMadF16() const {
return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
}
+bool GCNSubtarget::useVGPRIndexMode() const {
+ return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 08878d87fb09..b0188b003c7f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -941,9 +941,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return HasVGPRIndexMode;
}
- bool useVGPRIndexMode(bool UserEnable) const {
- return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
- }
+ bool useVGPRIndexMode() const;
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 794959618705..e73d87cd66af 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -90,11 +90,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool> EnableVGPRIndexMode(
- "amdgpu-vgpr-index-mode",
- cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
- cl::init(false));
-
static cl::opt<bool> DisableLoopAlignment(
"amdgpu-disable-loop-alignment",
cl::desc("Do not align and prefetch loops"),
@@ -3415,7 +3410,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
MachineBasicBlock::iterator I(&MI);
@@ -3510,7 +3505,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
new file mode 100644
index 000000000000..4f9d35dd905f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -0,0 +1,1289 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
+
+define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b32 s4, 1.0
+; GPRIDX-NEXT: s_mov_b32 s5, 2.0
+; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
+; GPRIDX-NEXT: s_mov_b32 s7, 4.0
+; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
+; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
+; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
+; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
+; GPRIDX-NEXT: s_mov_b64 s[12:13], exec
+; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0
+; GPRIDX-NEXT: s_mov_b32 m0, s14
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
+; GPRIDX-NEXT: s_movrels_b32 s14, s4
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s14
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB0_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[12:13]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8f32_const_s_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b32 s4, 1.0
+; MOVREL-NEXT: s_mov_b32 s5, 2.0
+; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
+; MOVREL-NEXT: s_mov_b32 s7, 4.0
+; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
+; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
+; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
+; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
+; MOVREL-NEXT: s_mov_b64 s[12:13], exec
+; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s14, v0
+; MOVREL-NEXT: s_mov_b32 m0, s14
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0
+; MOVREL-NEXT: s_movrels_b32 s14, s4
+; MOVREL-NEXT: v_mov_b32_e32 v1, s14
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB0_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[12:13]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v1
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
+ ret float %ext
+}
+
+define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s4, 1.0
+; GPRIDX-NEXT: s_mov_b32 m0, s2
+; GPRIDX-NEXT: s_mov_b32 s5, 2.0
+; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
+; GPRIDX-NEXT: s_mov_b32 s7, 4.0
+; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
+; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
+; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
+; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
+; GPRIDX-NEXT: s_movrels_b32 s0, s4
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s4, 1.0
+; MOVREL-NEXT: s_mov_b32 m0, s2
+; MOVREL-NEXT: s_mov_b32 s5, 2.0
+; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
+; MOVREL-NEXT: s_mov_b32 s7, 4.0
+; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
+; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
+; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
+; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
+; MOVREL-NEXT: s_movrels_b32 s0, s4
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
+ ret float %ext
+}
+
+define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_s_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b64 s[8:9], exec
+; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0
+; GPRIDX-NEXT: s_mov_b32 m0, s10
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
+; GPRIDX-NEXT: s_movrels_b32 s10, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s10
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB2_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[8:9]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v1
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f32_s_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b64 s[8:9], exec
+; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s10, v0
+; MOVREL-NEXT: s_mov_b32 m0, s10
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0
+; MOVREL-NEXT: s_movrels_b32 s10, s0
+; MOVREL-NEXT: v_mov_b32_e32 v1, s10
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB2_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[8:9]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v1
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %ext = extractelement <8 x float> %vec, i32 %sel
+ ret float %ext
+}
+
+define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_v_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB3_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8f32_v_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
+; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB3_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v9
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x float> %vec, i32 %sel
+ ret float %ext
+}
+
+define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f32_v_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 m0, s2
+; MOVREL-NEXT: v_movrels_b32_e32 v0, v0
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %ext = extractelement <8 x float> %vec, i32 %sel
+ ret float %ext
+}
+
+define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 m0, s10
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_movrels_b32 s0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f32_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 m0, s10
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_movrels_b32 s0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %ext = extractelement <8 x float> %vec, i32 %sel
+ ret float %ext
+}
+
+define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
+; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
+; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
+; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
+; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
+; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
+; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
+; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
+; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
+; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
+; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1
+; GPRIDX-NEXT: s_add_u32 s23, s22, 1
+; GPRIDX-NEXT: s_mov_b32 m0, s22
+; GPRIDX-NEXT: s_nop 0
+; GPRIDX-NEXT: s_movrels_b32 s22, s4
+; GPRIDX-NEXT: s_mov_b32 m0, s23
+; GPRIDX-NEXT: s_nop 0
+; GPRIDX-NEXT: s_movrels_b32 s23, s4
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB6_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[20:21]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s22
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s23
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8i64_const_s_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b64 s[4:5], 1
+; MOVREL-NEXT: s_mov_b64 s[6:7], 2
+; MOVREL-NEXT: s_mov_b64 s[8:9], 3
+; MOVREL-NEXT: s_mov_b64 s[10:11], 4
+; MOVREL-NEXT: s_mov_b64 s[12:13], 5
+; MOVREL-NEXT: s_mov_b64 s[14:15], 6
+; MOVREL-NEXT: s_mov_b64 s[16:17], 7
+; MOVREL-NEXT: s_mov_b64 s[18:19], 8
+; MOVREL-NEXT: s_mov_b64 s[20:21], exec
+; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
+; MOVREL-NEXT: s_lshl_b32 s22, s22, 1
+; MOVREL-NEXT: s_add_u32 s23, s22, 1
+; MOVREL-NEXT: s_mov_b32 m0, s22
+; MOVREL-NEXT: s_movrels_b32 s22, s4
+; MOVREL-NEXT: s_mov_b32 m0, s23
+; MOVREL-NEXT: s_movrels_b32 s23, s4
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB6_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[20:21]
+; MOVREL-NEXT: v_mov_b32_e32 v0, s22
+; MOVREL-NEXT: v_mov_b32_e32 v1, s23
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
+ ret i64 %ext
+}
+
+define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
+; GPRIDX-NEXT: s_mov_b32 m0, s2
+; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
+; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
+; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
+; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
+; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
+; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
+; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b64 s[4:5], 1
+; MOVREL-NEXT: s_mov_b32 m0, s2
+; MOVREL-NEXT: s_mov_b64 s[6:7], 2
+; MOVREL-NEXT: s_mov_b64 s[8:9], 3
+; MOVREL-NEXT: s_mov_b64 s[10:11], 4
+; MOVREL-NEXT: s_mov_b64 s[12:13], 5
+; MOVREL-NEXT: s_mov_b64 s[14:15], 6
+; MOVREL-NEXT: s_mov_b64 s[16:17], 7
+; MOVREL-NEXT: s_mov_b64 s[18:19], 8
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v1, s1
+; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
+ store i64 %ext, i64 addrspace(1)* undef
+ ret void
+}
+
+define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_mov_b64 s[16:17], exec
+; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
+; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1
+; GPRIDX-NEXT: s_add_u32 s19, s18, 1
+; GPRIDX-NEXT: s_mov_b32 m0, s18
+; GPRIDX-NEXT: s_nop 0
+; GPRIDX-NEXT: s_movrels_b32 s18, s0
+; GPRIDX-NEXT: s_mov_b32 m0, s19
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
+; GPRIDX-NEXT: s_movrels_b32 s19, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB8_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[16:17]
+; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8i64_s_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_mov_b64 s[16:17], exec
+; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s18, v0
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
+; MOVREL-NEXT: s_lshl_b32 s18, s18, 1
+; MOVREL-NEXT: s_add_u32 s19, s18, 1
+; MOVREL-NEXT: s_mov_b32 m0, s18
+; MOVREL-NEXT: s_movrels_b32 s18, s0
+; MOVREL-NEXT: s_mov_b32 m0, s19
+; MOVREL-NEXT: s_movrels_b32 s19, s0
+; MOVREL-NEXT: v_mov_b32_e32 v1, s18
+; MOVREL-NEXT: v_mov_b32_e32 v2, s19
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB8_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[16:17]
+; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2]
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i64> %vec, i32 %sel
+ store i64 %ext, i64 addrspace(1)* undef
+ ret void
+}
+
+define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_v_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
+; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
+; GPRIDX-NEXT: s_add_u32 s7, s6, 1
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB9_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
+; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8i64_v_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
+; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: s_add_u32 s7, s6, 1
+; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: s_mov_b32 m0, s7
+; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB9_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v17
+; MOVREL-NEXT: v_mov_b32_e32 v1, v18
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x i64> %vec, i32 %sel
+ ret i64 %ext
+}
+
+define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
+; GPRIDX-NEXT: s_add_u32 s1, s0, 1
+; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8i64_v_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_lshl_b32 s0, s2, 1
+; MOVREL-NEXT: s_mov_b32 m0, s0
+; MOVREL-NEXT: s_add_u32 s0, s0, 1
+; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
+; MOVREL-NEXT: s_mov_b32 m0, s0
+; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i64> %vec, i32 %sel
+ store i64 %ext, i64 addrspace(1)* undef
+ ret void
+}
+
+define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 m0, s18
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8i64_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 m0, s18
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v1, s1
+; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i64> %vec, i32 %sel
+ store i64 %ext, i64 addrspace(1)* undef
+ ret void
+}
+
+define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_add_u32 m0, s10, 3
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_movrels_b32 s0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_add_u32 m0, s10, 3
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_movrels_b32 s0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 3
+ %ext = extractelement <8 x float> %vec, i32 %add
+ ret float %ext
+}
+
+define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB13_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v9
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
+; MOVREL-NEXT: v_movrels_b32_e32 v8, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB13_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v8
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %add = add i32 %sel, 3
+ %ext = extractelement <8 x float> %vec, i32 %add
+ ret float %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 1
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 1
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 1
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 2
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 2
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 2
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 3
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 4
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 4
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 4
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 5
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 5
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 5
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 6
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 6
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 6
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, 7
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, 7
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, 7
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_add_u32 m0, s18, -1
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: ; return to shader part epilog
+;
+; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_add_u32 m0, s18, -1
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: ; return to shader part epilog
+entry:
+ %add = add i32 %sel, -1
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
+; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
+; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
+; GPRIDX-NEXT: s_add_u32 s7, s6, 1
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB22_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
+; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v18
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
+; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: s_add_u32 s7, s6, 1
+; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
+; MOVREL-NEXT: s_mov_b32 m0, s7
+; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB22_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v16
+; MOVREL-NEXT: v_mov_b32_e32 v1, v17
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %add = add i32 %sel, 3
+ %ext = extractelement <8 x double> %vec, i32 %add
+ ret double %ext
+}
+
+define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
+; GPRIDX-LABEL: dyn_extract_v8p3_v_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v9, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB23_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8p3_v_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
+; MOVREL-NEXT: v_movrels_b32_e32 v9, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB23_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v9
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
+ ret i8 addrspace(3)* %ext
+}
+
+define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
+; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 m0, s10
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_movrels_b32 s0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: ds_write_b32 v0, v0
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8p3_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 m0, s10
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_movrels_b32 s0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: s_mov_b32 m0, -1
+; MOVREL-NEXT: ds_write_b32 v0, v0
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
+ store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
+ ret void
+}
+
+define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
+; GPRIDX-LABEL: dyn_extract_v8p1_v_v:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
+; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
+; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
+; GPRIDX-NEXT: s_add_u32 s7, s6, 1
+; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
+; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
+; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
+; GPRIDX-NEXT: s_cbranch_execnz BB25_1
+; GPRIDX-NEXT: ; %bb.2:
+; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
+; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
+; GPRIDX-NEXT: s_setpc_b64 s[30:31]
+;
+; MOVREL-LABEL: dyn_extract_v8p1_v_v:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT: s_mov_b64 s[4:5], exec
+; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
+; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
+; MOVREL-NEXT: s_mov_b32 m0, s6
+; MOVREL-NEXT: s_add_u32 s7, s6, 1
+; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: s_mov_b32 m0, s7
+; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
+; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
+; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
+; MOVREL-NEXT: s_cbranch_execnz BB25_1
+; MOVREL-NEXT: ; %bb.2:
+; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
+; MOVREL-NEXT: v_mov_b32_e32 v0, v17
+; MOVREL-NEXT: v_mov_b32_e32 v1, v18
+; MOVREL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
+ ret i8 addrspace(1)* %ext
+}
+
+define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
+; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
+; GPRIDX: ; %bb.0: ; %entry
+; GPRIDX-NEXT: s_mov_b32 s0, s2
+; GPRIDX-NEXT: s_mov_b32 s1, s3
+; GPRIDX-NEXT: s_mov_b32 m0, s18
+; GPRIDX-NEXT: s_mov_b32 s2, s4
+; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 s4, s6
+; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 s6, s8
+; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 s8, s10
+; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 s10, s12
+; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 s12, s14
+; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 s14, s16
+; GPRIDX-NEXT: s_mov_b32 s15, s17
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
+; GPRIDX-NEXT: s_endpgm
+;
+; MOVREL-LABEL: dyn_extract_v8p1_s_s:
+; MOVREL: ; %bb.0: ; %entry
+; MOVREL-NEXT: s_mov_b32 s0, s2
+; MOVREL-NEXT: s_mov_b32 s1, s3
+; MOVREL-NEXT: s_mov_b32 m0, s18
+; MOVREL-NEXT: s_mov_b32 s2, s4
+; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 s4, s6
+; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 s6, s8
+; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 s8, s10
+; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 s10, s12
+; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 s12, s14
+; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 s14, s16
+; MOVREL-NEXT: s_mov_b32 s15, s17
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: v_mov_b32_e32 v0, s0
+; MOVREL-NEXT: v_mov_b32_e32 v1, s1
+; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
+; MOVREL-NEXT: s_endpgm
+entry:
+ %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
+ store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
new file mode 100644
index 000000000000..f2d53090f875
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
@@ -0,0 +1,810 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s
+# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s
+
+---
+name: extract_vector_elt_s_s32_v2s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v2s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v2s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v3s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2, $sgpr3
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v3s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v3s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v4s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = COPY $sgpr4
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v8s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v16s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v16s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v16s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v32s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ %1:sgpr(s32) = COPY $sgpr40
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s64_v2s64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v2s64
+ ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v2s64
+ ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = COPY $sgpr4
+ %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s64_v4s64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v4s64
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v4s64
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s64_v8s64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s64_v16s64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ %1:sgpr(s32) = COPY $sgpr40
+ %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_s_s32_v8s32_idx_offset_1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 -1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s32_v8s32_idx_offset_7
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 7
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s32_v8s32_idx_offset_8
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 8
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s64_v8s64_idx_offset_1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s64_v8s64_idx_offset_2
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 2
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
+ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
+ ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
+ %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 -1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_v_s32_v2s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr2
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v2s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v3s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $sgpr3
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v3s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v4s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:sgpr(s32) = COPY $sgpr4
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v8s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v16s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v16s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v32s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v32s32
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ %1:sgpr(s32) = COPY $sgpr40
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v8s32_idx_offset_1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 -1
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_v_s32_v8s32_idx_offset_7
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 7
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: extract_vector_elt_v_s32_v8s32_idx_offset_8
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:sgpr(s32) = COPY $sgpr8
+ %2:sgpr(s32) = G_CONSTANT i32 8
+ %3:sgpr(s32) = G_ADD %1, %2
+ %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
+ S_ENDPGM 0, implicit %4
+...
More information about the llvm-commits
mailing list