[llvm] r357235 - AMDGPU/GlobalISel: Insert waterfall loop for vector indexing
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 20:54:56 PDT 2019
Author: arsenm
Date: Thu Mar 28 20:54:56 2019
New Revision: 357235
URL: http://llvm.org/viewvc/llvm-project?rev=357235&view=rev
Log:
AMDGPU/GlobalISel: Insert waterfall loop for vector indexing
The register index can only really be an SGPR. Lie that a VGPR index
is legal, and then rewrite the instruction in a waterfall loop to
handle the index.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=357235&r1=357234&r2=357235&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Thu Mar 28 20:54:56 2019
@@ -13,9 +13,11 @@
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -328,6 +330,170 @@ static LLT getHalfSizedType(LLT Ty) {
return LLT::scalar(Ty.getSizeInBits() / 2);
}
+/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
+/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
+/// execute the instruction for each unique combination of values in all lanes
+/// in the wave. The block will be split such that new blocks
+void AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineBasicBlock::iterator I(MI);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ assert(OpIndices.size() == 1 &&
+ "need to implement support for multiple operands");
+
+ // Use a set to avoid extra readfirstlanes in the case where multiple operands
+ // are the same register.
+ SmallSet<unsigned, 4> SGPROperandRegs;
+ for (unsigned Op : OpIndices) {
+ assert(MI.getOperand(Op).isUse());
+ unsigned Reg = MI.getOperand(Op).getReg();
+ const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
+ if (OpBank->getID() == AMDGPU::VGPRRegBankID)
+ SGPROperandRegs.insert(Reg);
+ }
+
+ // No operands need to be replaced, so no need to loop.
+ if (SGPROperandRegs.empty())
+ return;
+
+ MachineIRBuilder B(MI);
+ SmallVector<unsigned, 4> ResultRegs;
+ SmallVector<unsigned, 4> InitResultRegs;
+ SmallVector<unsigned, 4> PhiRegs;
+ for (MachineOperand &Def : MI.defs()) {
+ LLT ResTy = MRI.getType(Def.getReg());
+ const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
+ ResultRegs.push_back(Def.getReg());
+ unsigned InitReg = B.buildUndef(ResTy).getReg(0);
+ unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy);
+ InitResultRegs.push_back(InitReg);
+ PhiRegs.push_back(PhiReg);
+ MRI.setRegBank(PhiReg, *DefBank);
+ MRI.setRegBank(InitReg, *DefBank);
+ }
+
+ unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // Don't bother using generic instructions/registers for the exec mask.
+ B.buildInstr(TargetOpcode::IMPLICIT_DEF)
+ .addDef(InitSaveExecReg);
+
+ // Save the EXEC mask
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
+ .addReg(AMDGPU::EXEC);
+
+ unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ // To insert the loop we need to split the block. Move everything before this
+ // point to a new block, and insert a new empty block before this instruction.
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+ MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, RestoreExecBB);
+ MF->insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(RestoreExecBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+ RestoreExecBB->addSuccessor(RemainderBB);
+
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
+ B.buildInstr(TargetOpcode::PHI)
+ .addDef(PhiExec)
+ .addReg(InitSaveExecReg)
+ .addMBB(&MBB)
+ .addReg(NewExec)
+ .addMBB(LoopBB);
+
+ for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
+ B.buildInstr(TargetOpcode::G_PHI)
+ .addDef(std::get<2>(Result))
+ .addReg(std::get<0>(Result)) // Initial value / implicit_def
+ .addMBB(&MBB)
+ .addReg(std::get<1>(Result)) // Mid-loop value.
+ .addMBB(LoopBB);
+ }
+
+ // Move the instruction into the loop.
+ LoopBB->splice(LoopBB->end(), &MBB, I);
+ I = std::prev(LoopBB->end());
+
+ for (MachineOperand &Op : MI.uses()) {
+ if (!Op.isReg())
+ continue;
+
+ assert(!Op.isDef());
+ if (SGPROperandRegs.count(Op.getReg())) {
+ unsigned CurrentLaneOpReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ MRI.setType(CurrentLaneOpReg, LLT::scalar(32)); // FIXME
+
+ assert(MRI.getType(Op.getReg())== LLT::scalar(32) &&
+ "need to implement support for other types");
+
+ constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(Op.getReg());
+
+ // FIXME: Need to and each conditon
+
+ // Compare the just read SGPR value to all possible operand values.
+ B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ .addDef(CondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(Op.getReg());
+ Op.setReg(CurrentLaneOpReg);
+ }
+ }
+
+ // Update EXEC, save the original EXEC value to VCC.
+ B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
+ .addDef(NewExec)
+ .addReg(CondReg, RegState::Kill);
+
+ MRI.setSimpleHint(NewExec, CondReg);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+ B.buildInstr(AMDGPU::S_XOR_B64_term)
+ .addDef(AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(NewExec);
+
+ // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
+ // s_cbranch_scc0?
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
+ B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
+ .addMBB(LoopBB);
+
+ // Restore the EXEC mask
+ B.buildInstr(AMDGPU::S_MOV_B64_term)
+ .addDef(AMDGPU::EXEC)
+ .addReg(SaveExecReg);
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -436,6 +602,10 @@ void AMDGPURegisterBankInfo::applyMappin
MI.eraseFromParent();
return;
}
+ case AMDGPU::G_EXTRACT_VECTOR_ELT:
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, { 2 });
+ return;
default:
break;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h?rev=357235&r1=357234&r2=357235&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h Thu Mar 28 20:54:56 2019
@@ -37,6 +37,10 @@ protected:
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const SIRegisterInfo *TRI;
+ void executeInWaterfallLoop(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const;
+
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir?rev=357235&r1=357234&r2=357235&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir Thu Mar 28 20:54:56 2019
@@ -3,13 +3,15 @@
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---
-name: extract_vector_elt_v16i32_ss
+name: extract_vector_elt_v16s32_ss
legalized: true
+tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
+ ; CHECK-LABEL: name: extract_vector_elt_v16s32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
@@ -21,17 +23,36 @@ body: |
...
---
-name: extract_vector_elt_v16i32_sv
+name: extract_vector_elt_v16s32_sv
legalized: true
+tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
+ ; CHECK-LABEL: name: extract_vector_elt_v16s32_sv
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; CHECK: .1:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; CHECK: .2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: .3:
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $vgpr0
@@ -40,13 +61,15 @@ body: |
...
---
-name: extract_vector_elt_v16i32_vs
+name: extract_vector_elt_v16s32_vs
legalized: true
+tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
+ ; CHECK-LABEL: name: extract_vector_elt_v16s32_vs
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
@@ -58,19 +81,75 @@ body: |
...
---
-name: extract_vector_elt_v16i32_vv
+name: extract_vector_elt_v16s32_vv
legalized: true
+tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
+ ; CHECK-LABEL: name: extract_vector_elt_v16s32_vv
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; CHECK: .1:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; CHECK: .2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: .3:
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
+
+---
+name: extract_vector_elt_v8s64_vv
+legalized: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; CHECK-LABEL: name: extract_vector_elt_v8s64_vv
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; CHECK: .1:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s64) = G_PHI [[DEF]](s64), %bb.0, %2(s64), %bb.1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[V_READFIRSTLANE_B32_]](s32)
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; CHECK: .2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: .3:
+ ; CHECK: $vgpr0_vgpr1 = COPY [[EVEC]](s64)
+ %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ %1:_(s32) = COPY $vgpr16
+ %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
More information about the llvm-commits
mailing list