[llvm] e3d352c - AMDGPU/GlobalISel: Fold constant offset vector extract indexes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 07:51:19 PST 2020
Author: Matt Arsenault
Date: 2020-01-22T10:50:59-05:00
New Revision: e3d352c54119b5d30821b4857dec77ac0af688c8
URL: https://github.com/llvm/llvm-project/commit/e3d352c54119b5d30821b4857dec77ac0af688c8
DIFF: https://github.com/llvm/llvm-project/commit/e3d352c54119b5d30821b4857dec77ac0af688c8.diff
LOG: AMDGPU/GlobalISel: Fold constant offset vector extract indexes
Handle dynamic vector extracts that use an index that's an add of a
constant offset into moving the base subregister of the indexing
operation.
Force the add into the loop in regbankselect, which will be recognized
when selected.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index ded4e1b400d0..e9aeefd30602 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1792,6 +1792,30 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
return true;
}
+/// Return the register to use for the index value, and the subregister to use
+/// for the indirectly accessed register.
+static std::pair<Register, unsigned>
+computeIndirectRegIndex(MachineRegisterInfo &MRI,
+ const SIRegisterInfo &TRI,
+ const TargetRegisterClass *SuperRC,
+ Register IdxReg,
+ unsigned EltSize) {
+ Register IdxBaseReg;
+ int Offset;
+ MachineInstr *Unused;
+
+ std::tie(IdxBaseReg, Offset, Unused)
+ = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
+
+ ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize);
+
+ // Skip out of bounds offsets, or else we would end up using an undefined
+ // register.
+ if (static_cast<unsigned>(Offset) >= SubRegs.size())
+ return std::make_pair(IdxReg, SubRegs[0]);
+ return std::make_pair(IdxBaseReg, SubRegs[Offset]);
+}
+
bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
MachineInstr &MI) const {
Register DstReg = MI.getOperand(0).getReg();
@@ -1823,7 +1847,9 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
const DebugLoc &DL = MI.getDebugLoc();
const bool Is64 = DstTy.getSizeInBits() == 64;
- unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+ unsigned SubReg;
+ std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, SrcRC, IdxReg,
+ DstTy.getSizeInBits() / 8);
if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
if (DstTy.getSizeInBits() != 32 && !Is64)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index bf25ed0c9471..9f222eea33e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -69,6 +69,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPURegisterBankInfo.h"
+
+#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -76,8 +78,8 @@
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -1975,7 +1977,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty());
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ const LLT S32 = LLT::scalar(32);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
MachineIRBuilder B(MI);
const ValueMapping &DstMapping
@@ -1983,10 +1991,40 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank;
const RegisterBank *SrcBank =
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ const RegisterBank *IdxBank =
+ OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
+
+ Register BaseIdxReg;
+ unsigned ConstOffset;
+ MachineInstr *OffsetDef;
+ std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
+ AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(2).getReg());
+
+ // See if the index is an add of a constant which will be foldable by moving
+ // the base register of the index later if this is going to be executed in a
+ // waterfall loop. This is essentially to reassociate the add of a constant
+ // with the readfirstlane.
+ bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
+ ConstOffset > 0 &&
+ ConstOffset < SrcTy.getNumElements();
+
+ // Re-insert the constant offset add inside the waterfall loop.
+ auto ReinsertIndexAdd = [=, &B, &MRI](MachineInstr &IdxUseInstr,
+ unsigned OpIdx) {
+ Register WaterfallIdx = IdxUseInstr.getOperand(OpIdx).getReg();
+ B.setInsertPt(*IdxUseInstr.getParent(), IdxUseInstr.getIterator());
+
+ auto MaterializedOffset = B.buildConstant(S32, ConstOffset);
+
+ auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
+ MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank);
+ IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
+ };
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- Register IdxReg = MI.getOperand(2).getReg();
+ // Move the base register. We'll re-insert the add later.
+ if (ShouldMoveIndexIntoLoop)
+ MI.getOperand(2).setReg(BaseIdxReg);
// If this is a VGPR result only because the index was a VGPR result, the
// actual indexing will be done on the SGPR source vector, which will
@@ -2010,13 +2048,14 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
buildVCopy(B, DstReg, TmpReg);
}
+ if (ShouldMoveIndexIntoLoop)
+ ReinsertIndexAdd(MI, 2);
+
return;
}
assert(DstTy.getSizeInBits() == 64);
- LLT SrcTy = MRI.getType(SrcReg);
- const LLT S32 = LLT::scalar(32);
LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
@@ -2029,7 +2068,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
// Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
- auto IdxLo = B.buildShl(S32, IdxReg, One);
+ auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
auto IdxHi = B.buildAdd(S32, IdxLo, One);
auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
@@ -2070,6 +2109,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
buildVCopy(B, DstRegs[1], TmpReg1);
}
+ if (ShouldMoveIndexIntoLoop)
+ ReinsertIndexAdd(*IdxLo, 1);
+
return;
}
case AMDGPU::G_INSERT_VECTOR_ELT: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 4f9d35dd905f..c47d10e64283 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -257,15 +257,10 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
+; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
-; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1
-; GPRIDX-NEXT: s_add_u32 s23, s22, 1
-; GPRIDX-NEXT: s_mov_b32 m0, s22
-; GPRIDX-NEXT: s_nop 0
; GPRIDX-NEXT: s_movrels_b32 s22, s4
-; GPRIDX-NEXT: s_mov_b32 m0, s23
-; GPRIDX-NEXT: s_nop 0
-; GPRIDX-NEXT: s_movrels_b32 s23, s4
+; GPRIDX-NEXT: s_movrels_b32 s23, s5
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB6_1
@@ -289,13 +284,10 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
; MOVREL-NEXT: s_mov_b64 s[20:21], exec
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
+; MOVREL-NEXT: s_lshl_b32 m0, s22, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0
-; MOVREL-NEXT: s_lshl_b32 s22, s22, 1
-; MOVREL-NEXT: s_add_u32 s23, s22, 1
-; MOVREL-NEXT: s_mov_b32 m0, s22
; MOVREL-NEXT: s_movrels_b32 s22, s4
-; MOVREL-NEXT: s_mov_b32 m0, s23
-; MOVREL-NEXT: s_movrels_b32 s23, s4
+; MOVREL-NEXT: s_movrels_b32 s23, s5
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB6_1
@@ -371,15 +363,11 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
; GPRIDX-NEXT: s_mov_b64 s[16:17], exec
; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0
+; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
-; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1
-; GPRIDX-NEXT: s_add_u32 s19, s18, 1
-; GPRIDX-NEXT: s_mov_b32 m0, s18
-; GPRIDX-NEXT: s_nop 0
; GPRIDX-NEXT: s_movrels_b32 s18, s0
-; GPRIDX-NEXT: s_mov_b32 m0, s19
+; GPRIDX-NEXT: s_movrels_b32 s19, s1
; GPRIDX-NEXT: v_mov_b32_e32 v1, s18
-; GPRIDX-NEXT: s_movrels_b32 s19, s0
; GPRIDX-NEXT: v_mov_b32_e32 v2, s19
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
@@ -410,13 +398,10 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
; MOVREL-NEXT: s_mov_b64 s[16:17], exec
; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s18, v0
+; MOVREL-NEXT: s_lshl_b32 m0, s18, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0
-; MOVREL-NEXT: s_lshl_b32 s18, s18, 1
-; MOVREL-NEXT: s_add_u32 s19, s18, 1
-; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_movrels_b32 s18, s0
-; MOVREL-NEXT: s_mov_b32 m0, s19
-; MOVREL-NEXT: s_movrels_b32 s19, s0
+; MOVREL-NEXT: s_movrels_b32 s19, s1
; MOVREL-NEXT: v_mov_b32_e32 v1, s18
; MOVREL-NEXT: v_mov_b32_e32 v2, s19
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
@@ -439,14 +424,13 @@ define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
+; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
-; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
-; GPRIDX-NEXT: s_add_u32 s7, s6, 1
-; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
-; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
+; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
@@ -463,13 +447,10 @@ define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
+; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
-; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
-; MOVREL-NEXT: s_mov_b32 m0, s6
-; MOVREL-NEXT: s_add_u32 s7, s6, 1
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
-; MOVREL-NEXT: s_mov_b32 m0, s7
-; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
+; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB9_1
@@ -487,24 +468,20 @@ define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
-; GPRIDX-NEXT: s_add_u32 s1, s0, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
-; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0)
-; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
+; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v17, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off
; GPRIDX-NEXT: s_endpgm
;
; MOVREL-LABEL: dyn_extract_v8i64_v_s:
; MOVREL: ; %bb.0: ; %entry
-; MOVREL-NEXT: s_lshl_b32 s0, s2, 1
-; MOVREL-NEXT: s_mov_b32 m0, s0
-; MOVREL-NEXT: s_add_u32 s0, s0, 1
+; MOVREL-NEXT: s_lshl_b32 m0, s2, 1
; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
-; MOVREL-NEXT: s_mov_b32 m0, s0
-; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: v_movrels_b32_e32 v17, v1
; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17]
; MOVREL-NEXT: s_endpgm
entry:
@@ -573,30 +550,30 @@ define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i3
; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
-; GPRIDX-NEXT: s_add_u32 m0, s10, 3
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 m0, s10
+; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
-; GPRIDX-NEXT: s_movrels_b32 s0, s0
+; GPRIDX-NEXT: s_movrels_b32 s0, s3
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
-; MOVREL-NEXT: s_add_u32 m0, s10, 3
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 m0, s10
+; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
-; MOVREL-NEXT: s_movrels_b32 s0, s0
+; MOVREL-NEXT: s_movrels_b32 s0, s3
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: ; return to shader part epilog
entry:
@@ -609,38 +586,36 @@ define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
-; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9
-; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
-; GPRIDX-NEXT: v_mov_b32_e32 v8, v0
+; GPRIDX-NEXT: v_mov_b32_e32 v9, v3
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB13_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
-; GPRIDX-NEXT: v_mov_b32_e32 v0, v8
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v9
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1
-; MOVREL-NEXT: v_readfirstlane_b32 s6, v9
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v8
; MOVREL-NEXT: s_mov_b32 m0, s6
-; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9
-; MOVREL-NEXT: v_movrels_b32_e32 v8, v0
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8
+; MOVREL-NEXT: v_movrels_b32_e32 v9, v3
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB13_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
-; MOVREL-NEXT: v_mov_b32_e32 v0, v8
+; MOVREL-NEXT: v_mov_b32_e32 v0, v9
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%add = add i32 %sel, 3
@@ -653,9 +628,9 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 1
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
@@ -668,16 +643,16 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 1
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
@@ -690,7 +665,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 1
@@ -703,11 +678,11 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 2
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
@@ -718,18 +693,18 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 2
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
@@ -740,7 +715,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 2
@@ -753,13 +728,13 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 3
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
; GPRIDX-NEXT: s_mov_b32 s5, s7
; GPRIDX-NEXT: s_mov_b32 s6, s8
; GPRIDX-NEXT: s_mov_b32 s7, s9
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
@@ -768,20 +743,20 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 3
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
; MOVREL-NEXT: s_mov_b32 s5, s7
; MOVREL-NEXT: s_mov_b32 s6, s8
; MOVREL-NEXT: s_mov_b32 s7, s9
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
@@ -790,7 +765,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 3
@@ -803,7 +778,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 4
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
@@ -812,20 +786,20 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s7, s9
; GPRIDX-NEXT: s_mov_b32 s8, s10
; GPRIDX-NEXT: s_mov_b32 s9, s11
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 4
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
@@ -834,13 +808,14 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s7, s9
; MOVREL-NEXT: s_mov_b32 s8, s10
; MOVREL-NEXT: s_mov_b32 s9, s11
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 4
@@ -853,7 +828,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 5
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
@@ -864,18 +838,18 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s9, s11
; GPRIDX-NEXT: s_mov_b32 s10, s12
; GPRIDX-NEXT: s_mov_b32 s11, s13
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 5
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
@@ -886,11 +860,12 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s9, s11
; MOVREL-NEXT: s_mov_b32 s10, s12
; MOVREL-NEXT: s_mov_b32 s11, s13
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 5
@@ -903,7 +878,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 6
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
@@ -916,16 +890,16 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s11, s13
; GPRIDX-NEXT: s_mov_b32 s12, s14
; GPRIDX-NEXT: s_mov_b32 s13, s15
+; GPRIDX-NEXT: s_mov_b32 m0, s18
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 6
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
@@ -938,9 +912,10 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s11, s13
; MOVREL-NEXT: s_mov_b32 s12, s14
; MOVREL-NEXT: s_mov_b32 s13, s15
+; MOVREL-NEXT: s_mov_b32 m0, s18
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 6
@@ -953,7 +928,6 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s0, s2
; GPRIDX-NEXT: s_mov_b32 s1, s3
-; GPRIDX-NEXT: s_add_u32 m0, s18, 7
; GPRIDX-NEXT: s_mov_b32 s2, s4
; GPRIDX-NEXT: s_mov_b32 s3, s5
; GPRIDX-NEXT: s_mov_b32 s4, s6
@@ -968,14 +942,15 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
; GPRIDX-NEXT: s_mov_b32 s13, s15
; GPRIDX-NEXT: s_mov_b32 s14, s16
; GPRIDX-NEXT: s_mov_b32 s15, s17
-; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; GPRIDX-NEXT: s_mov_b32 m0, s18
+; GPRIDX-NEXT: s_nop 0
+; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15]
; GPRIDX-NEXT: ; return to shader part epilog
;
; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s0, s2
; MOVREL-NEXT: s_mov_b32 s1, s3
-; MOVREL-NEXT: s_add_u32 m0, s18, 7
; MOVREL-NEXT: s_mov_b32 s2, s4
; MOVREL-NEXT: s_mov_b32 s3, s5
; MOVREL-NEXT: s_mov_b32 s4, s6
@@ -990,7 +965,8 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec,
; MOVREL-NEXT: s_mov_b32 s13, s15
; MOVREL-NEXT: s_mov_b32 s14, s16
; MOVREL-NEXT: s_mov_b32 s15, s17
-; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1]
+; MOVREL-NEXT: s_mov_b32 m0, s18
+; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15]
; MOVREL-NEXT: ; return to shader part epilog
entry:
%add = add i32 %sel, 7
@@ -1052,49 +1028,45 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
-; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18
-; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
-; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
-; GPRIDX-NEXT: s_add_u32 s7, s6, 1
-; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
-; GPRIDX-NEXT: v_mov_b32_e32 v16, v0
-; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
+; GPRIDX-NEXT: s_add_u32 s7, s6, 3
+; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1
+; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
+; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
+; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
; GPRIDX-NEXT: s_cbranch_execnz BB22_1
; GPRIDX-NEXT: ; %bb.2:
; GPRIDX-NEXT: s_mov_b64 exec, s[4:5]
-; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
-; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
+; GPRIDX-NEXT: v_mov_b32_e32 v0, v17
+; GPRIDX-NEXT: v_mov_b32_e32 v1, v18
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
;
; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1
-; MOVREL-NEXT: v_readfirstlane_b32 s6, v18
-; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18
-; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
-; MOVREL-NEXT: s_mov_b32 m0, s6
-; MOVREL-NEXT: s_add_u32 s7, s6, 1
-; MOVREL-NEXT: v_movrels_b32_e32 v16, v0
-; MOVREL-NEXT: s_mov_b32 m0, s7
+; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
+; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
+; MOVREL-NEXT: s_add_u32 s6, s6, 3
+; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
+; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB22_1
; MOVREL-NEXT: ; %bb.2:
; MOVREL-NEXT: s_mov_b64 exec, s[4:5]
-; MOVREL-NEXT: v_mov_b32_e32 v0, v16
-; MOVREL-NEXT: v_mov_b32_e32 v1, v17
+; MOVREL-NEXT: v_mov_b32_e32 v0, v17
+; MOVREL-NEXT: v_mov_b32_e32 v1, v18
; MOVREL-NEXT: s_setpc_b64 s[30:31]
entry:
%add = add i32 %sel, 3
@@ -1188,14 +1160,13 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %
; GPRIDX-NEXT: s_mov_b64 s[4:5], exec
; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16
+; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1
; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
-; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1
-; GPRIDX-NEXT: s_add_u32 s7, s6, 1
-; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0)
+; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
; GPRIDX-NEXT: v_mov_b32_e32 v17, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0)
-; GPRIDX-NEXT: v_mov_b32_e32 v18, v0
+; GPRIDX-NEXT: v_mov_b32_e32 v18, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc
; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc
@@ -1212,13 +1183,10 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %
; MOVREL-NEXT: s_mov_b64 s[4:5], exec
; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s6, v16
+; MOVREL-NEXT: s_lshl_b32 m0, s6, 1
; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16
-; MOVREL-NEXT: s_lshl_b32 s6, s6, 1
-; MOVREL-NEXT: s_mov_b32 m0, s6
-; MOVREL-NEXT: s_add_u32 s7, s6, 1
; MOVREL-NEXT: v_movrels_b32_e32 v17, v0
-; MOVREL-NEXT: s_mov_b32 m0, s7
-; MOVREL-NEXT: v_movrels_b32_e32 v18, v0
+; MOVREL-NEXT: v_movrels_b32_e32 v18, v1
; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc
; MOVREL-NEXT: s_xor_b64 exec, exec, vcc
; MOVREL-NEXT: s_cbranch_execnz BB25_1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
index e76927138f45..774d8bc20afe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
@@ -286,18 +286,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
- ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]]
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:sgpr(s32) = COPY $sgpr8
@@ -352,18 +348,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7
; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
- ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]]
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:sgpr(s32) = COPY $sgpr8
@@ -418,18 +410,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
- ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]]
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:sgpr(s32) = COPY $sgpr8
@@ -451,18 +439,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2
; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: $m0 = COPY [[S_ADD_U32_]]
- ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: $m0 = COPY [[COPY1]]
+ ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]]
; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]]
%0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:sgpr(s32) = COPY $sgpr8
@@ -685,18 +669,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
- ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0
; GPRIDX: S_SET_GPR_IDX_OFF
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
@@ -753,18 +733,14 @@ body: |
; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
- ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; MOVREL: $m0 = COPY [[S_ADD_U32_]]
- ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: $m0 = COPY [[COPY1]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]]
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
- ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
- ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
- ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0
- ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0
; GPRIDX: S_SET_GPR_IDX_OFF
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
index afea578dcb60..9ce09e46aae2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
@@ -436,10 +436,10 @@ body: |
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
@@ -447,9 +447,11 @@ body: |
; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
- ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -462,10 +464,10 @@ body: |
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
@@ -473,9 +475,11 @@ body: |
; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
- ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -643,10 +647,10 @@ body: |
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; WAVE64: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE64: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
@@ -662,12 +666,14 @@ body: |
; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
- ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
- ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
+ ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
+ ; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
+ ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -681,10 +687,10 @@ body: |
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; WAVE32: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE32: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
@@ -700,12 +706,14 @@ body: |
; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
- ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
- ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
+ ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
+ ; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
+ ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -736,10 +744,10 @@ body: |
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
@@ -747,9 +755,11 @@ body: |
; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
- ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
+ ; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@@ -763,10 +773,10 @@ body: |
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
@@ -774,9 +784,11 @@ body: |
; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %11, %bb.1
; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %4(s32), %bb.1
- ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C1]]
+ ; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[ADD1]](s32)
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
@@ -807,10 +819,10 @@ body: |
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE64: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE64: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE64: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
@@ -826,12 +838,14 @@ body: |
; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
- ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
- ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
+ ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
+ ; WAVE64: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
; WAVE64: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
+ ; WAVE64: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
@@ -847,10 +861,10 @@ body: |
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; WAVE32: [[ADD:%[0-9]+]]:vgpr_32(s32) = G_ADD [[COPY1]], [[COPY2]]
+ ; WAVE32: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
; WAVE32: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
@@ -866,12 +880,14 @@ body: |
; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %11(s32), %bb.1
; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %6(s32), %bb.1
; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %7(s32), %bb.1
- ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[ADD]](s32), implicit $exec
- ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[ADD]](s32), implicit $exec
- ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C1]](s32)
- ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[V_READFIRSTLANE_B32_]], [[C2]]
+ ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ADD1]], [[C1]](s32)
+ ; WAVE32: [[ADD2:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C1]]
; WAVE32: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD1]](s32)
+ ; WAVE32: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD2]](s32)
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC]](s32), implicit $exec
; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 [[EVEC1]](s32), implicit $exec
; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
More information about the llvm-commits
mailing list