[llvm] r373714 - AMDGPU/GlobalISel: Support wave32 waterfall loops
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 01:35:36 PDT 2019
Author: arsenm
Date: Fri Oct 4 01:35:35 2019
New Revision: 373714
URL: http://llvm.org/viewvc/llvm-project?rev=373714&view=rev
Log:
AMDGPU/GlobalISel: Support wave32 waterfall loops
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-buffer-load.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Fri Oct 4 01:35:35 2019
@@ -668,6 +668,18 @@ bool AMDGPURegisterBankInfo::executeInWa
MachineBasicBlock &MBB = B.getMBB();
MachineFunction *MF = &B.getMF();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ const unsigned WaveAndOpc = Subtarget.isWave32() ?
+ AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ const unsigned MovTermOpc = Subtarget.isWave32() ?
+ AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
+ const unsigned XorTermOpc = Subtarget.isWave32() ?
+ AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
+ const unsigned AndSaveExecOpc = Subtarget.isWave32() ?
+ AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+ const unsigned ExecReg = Subtarget.isWave32() ?
+ AMDGPU::EXEC_LO : AMDGPU::EXEC;
+
for (MachineInstr &MI : Range) {
for (MachineOperand &Def : MI.defs()) {
LLT ResTy = MRI.getType(Def.getReg());
@@ -682,15 +694,15 @@ bool AMDGPURegisterBankInfo::executeInWa
}
}
- Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
+ Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
// Don't bother using generic instructions/registers for the exec mask.
B.buildInstr(TargetOpcode::IMPLICIT_DEF)
.addDef(InitSaveExecReg);
- Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register PhiExec = MRI.createVirtualRegister(WaveRC);
+ Register NewExec = MRI.createVirtualRegister(WaveRC);
// To insert the loop we need to split the block. Move everything before this
// point to a new block, and insert a new empty block before this instruction.
@@ -769,8 +781,7 @@ bool AMDGPURegisterBankInfo::executeInWa
CurrentLaneOpReg)
.addReg(Op.getReg());
- Register NewCondReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
bool First = CondReg == AMDGPU::NoRegister;
if (First)
CondReg = NewCondReg;
@@ -783,11 +794,10 @@ bool AMDGPURegisterBankInfo::executeInWa
Op.setReg(CurrentLaneOpReg);
if (!First) {
- Register AndReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
// If there are multiple operands to consider, and the conditions.
- B.buildInstr(AMDGPU::S_AND_B64)
+ B.buildInstr(WaveAndOpc)
.addDef(AndReg)
.addReg(NewCondReg)
.addReg(CondReg);
@@ -866,8 +876,7 @@ bool AMDGPURegisterBankInfo::executeInWa
ReadlanePieces.push_back(CurrentLaneOpReg);
}
- Register NewCondReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
bool First = CondReg == AMDGPU::NoRegister;
if (First)
CondReg = NewCondReg;
@@ -878,11 +887,10 @@ bool AMDGPURegisterBankInfo::executeInWa
.addReg(UnmergePiece);
if (!First) {
- Register AndReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
// If there are multiple operands to consider, and the conditions.
- B.buildInstr(AMDGPU::S_AND_B64)
+ B.buildInstr(WaveAndOpc)
.addDef(AndReg)
.addReg(NewCondReg)
.addReg(CondReg);
@@ -909,16 +917,16 @@ bool AMDGPURegisterBankInfo::executeInWa
B.setInsertPt(*LoopBB, LoopBB->end());
// Update EXEC, save the original EXEC value to VCC.
- B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
+ B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
.addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- B.buildInstr(AMDGPU::S_XOR_B64_term)
- .addDef(AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ B.buildInstr(XorTermOpc)
+ .addDef(ExecReg)
+ .addReg(ExecReg)
.addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
@@ -929,13 +937,13 @@ bool AMDGPURegisterBankInfo::executeInWa
.addMBB(LoopBB);
// Save the EXEC mask before the loop.
- BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
+ .addReg(ExecReg);
// Restore the EXEC mask after the loop.
B.setMBB(*RestoreExecBB);
- B.buildInstr(AMDGPU::S_MOV_B64_term)
- .addDef(AMDGPU::EXEC)
+ B.buildInstr(MovTermOpc)
+ .addDef(ExecReg)
.addReg(SaveExecReg);
// Restore the insert point before the original instruction.
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll Fri Oct 4 01:35:35 2019
@@ -174,7 +174,7 @@ define amdgpu_ps void @raw_buffer_store_
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
- ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
@@ -212,7 +212,7 @@ define amdgpu_ps void @raw_buffer_store_
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
- ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
@@ -460,7 +460,7 @@ define amdgpu_ps void @raw_buffer_store_
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
- ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
@@ -501,7 +501,7 @@ define amdgpu_ps void @raw_buffer_store_
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
- ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll Fri Oct 4 01:35:35 2019
@@ -133,7 +133,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -295,7 +295,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll Fri Oct 4 01:35:35 2019
@@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -102,9 +102,9 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -146,10 +146,10 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -475,7 +475,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -723,7 +723,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -766,7 +766,7 @@ define amdgpu_ps void @raw_buffer_store_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-buffer-load.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-buffer-load.mir?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-buffer-load.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-buffer-load.mir Fri Oct 4 01:35:35 2019
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
---
name: buffer_load_sss
@@ -10,13 +12,20 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
- ; CHECK-LABEL: name: buffer_load_sss
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64-LABEL: name: buffer_load_sss
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32-LABEL: name: buffer_load_sss
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $sgpr5
@@ -32,13 +41,20 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
- ; CHECK-LABEL: name: buffer_load_ssv
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64-LABEL: name: buffer_load_ssv
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32-LABEL: name: buffer_load_ssv
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr5
@@ -54,13 +70,20 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
- ; CHECK-LABEL: name: buffer_load_svs
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64-LABEL: name: buffer_load_svs
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32-LABEL: name: buffer_load_svs
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $sgpr5
@@ -76,39 +99,72 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
- ; CHECK-LABEL: name: buffer_load_vss
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %10, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
+ ; WAVE64-LABEL: name: buffer_load_vss
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE64: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE32-LABEL: name: buffer_load_vss
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE32: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $sgpr5
@@ -124,38 +180,70 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
- ; CHECK-LABEL: name: buffer_load_vvs
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
+ ; WAVE64-LABEL: name: buffer_load_vvs
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE64: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE32-LABEL: name: buffer_load_vvs
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE32: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(s32) = COPY $sgpr5
@@ -171,12 +259,18 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
- ; CHECK-LABEL: name: buffer_load_svv
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64-LABEL: name: buffer_load_svv
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32-LABEL: name: buffer_load_svv
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr4
%2:_(s32) = COPY $vgpr5
@@ -192,39 +286,72 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
- ; CHECK-LABEL: name: buffer_load_vsv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %10, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
+ ; WAVE64-LABEL: name: buffer_load_vsv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE64: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE64: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE32-LABEL: name: buffer_load_vsv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE32: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE32: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr5
@@ -240,38 +367,70 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
- ; CHECK-LABEL: name: buffer_load_vvv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
+ ; WAVE64-LABEL: name: buffer_load_vvv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE64: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE64: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE64: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE64: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE32-LABEL: name: buffer_load_vvv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; WAVE32: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; WAVE32: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; WAVE32: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; WAVE32: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(s32) = COPY $vgpr5
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir Fri Oct 4 01:35:35 2019
@@ -39,12 +39,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:sgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:sgpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[V_READFIRSTLANE_B32_]](s32), 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -76,7 +76,7 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:sgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -89,7 +89,7 @@ body: |
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:sgpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -121,7 +121,7 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:sgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -134,10 +134,10 @@ body: |
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY1]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY1]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:sgpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[V_READFIRSTLANE_B32_4]](s32), 0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll Fri Oct 4 01:35:35 2019
@@ -76,7 +76,7 @@ define amdgpu_ps void @load_1d_vgpr_vadd
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %19, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -99,7 +99,7 @@ define amdgpu_ps void @load_1d_vgpr_vadd
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -138,7 +138,7 @@ define amdgpu_ps void @load_1d_sgpr_vadd
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %20, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -161,7 +161,7 @@ define amdgpu_ps void @load_1d_sgpr_vadd
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll Fri Oct 4 01:35:35 2019
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
+; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
@@ -91,7 +91,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -114,7 +114,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -157,7 +157,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -170,7 +170,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -214,7 +214,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -248,7 +248,7 @@ define amdgpu_ps void @sample_1d_vgpr_va
; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll Fri Oct 4 01:35:35 2019
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
+; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
@@ -60,7 +60,7 @@ define amdgpu_ps float @raw_buffer_load_
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -73,7 +73,7 @@ define amdgpu_ps float @raw_buffer_load_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -104,12 +104,12 @@ define amdgpu_ps float @raw_buffer_load_
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -141,7 +141,7 @@ define amdgpu_ps float @raw_buffer_load_
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -154,10 +154,10 @@ define amdgpu_ps float @raw_buffer_load_
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll Fri Oct 4 01:35:35 2019
@@ -64,7 +64,7 @@ define amdgpu_ps float @struct_buffer_lo
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -77,7 +77,7 @@ define amdgpu_ps float @struct_buffer_lo
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -109,12 +109,12 @@ define amdgpu_ps float @struct_buffer_lo
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -147,7 +147,7 @@ define amdgpu_ps float @struct_buffer_lo
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
@@ -160,10 +160,10 @@ define amdgpu_ps float @struct_buffer_lo
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll Fri Oct 4 01:35:35 2019
@@ -65,7 +65,7 @@ define amdgpu_ps void @struct_buffer_sto
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -77,7 +77,7 @@ define amdgpu_ps void @struct_buffer_sto
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -108,11 +108,11 @@ define amdgpu_ps void @struct_buffer_sto
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
@@ -144,7 +144,7 @@ define amdgpu_ps void @struct_buffer_sto
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -156,10 +156,10 @@ define amdgpu_ps void @struct_buffer_sto
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir Fri Oct 4 01:35:35 2019
@@ -1,6 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE64 %s
+# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=WAVE32 %s
+# XUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=WAVE32 %s
+
---
name: extract_vector_elt_v16s32_ss
@@ -10,12 +13,19 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v16s32_ss
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
- ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
- ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v16s32_ss
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+ ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+ ; WAVE64: $vgpr0 = COPY [[EVEC]](s32)
+ ; WAVE32-LABEL: name: extract_vector_elt_v16s32_ss
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+ ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+ ; WAVE32: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $sgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -30,29 +40,53 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v16s32_sv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
- ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v16s32_sv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE64: $vgpr0 = COPY [[EVEC]](s32)
+ ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
+ ; WAVE32: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -67,12 +101,19 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v16s32_vs
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
- ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vs
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+ ; WAVE64: $vgpr0 = COPY [[EVEC]](s32)
+ ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vs
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+ ; WAVE32: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -87,29 +128,53 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v16s32_vv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
- ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v16s32_vv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE64: $vgpr0 = COPY [[EVEC]](s32)
+ ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
+ ; WAVE32: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -124,12 +189,19 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v8s64_ss
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
- ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32)
- ; CHECK: $sgpr0_sgpr1 = COPY [[EVEC]](s64)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v8s64_ss
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+ ; WAVE64: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32)
+ ; WAVE64: $sgpr0_sgpr1 = COPY [[EVEC]](s64)
+ ; WAVE32-LABEL: name: extract_vector_elt_v8s64_ss
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+ ; WAVE32: [[EVEC:%[0-9]+]]:sgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[COPY1]](s32)
+ ; WAVE32: $sgpr0_sgpr1 = COPY [[EVEC]](s64)
%0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $sgpr16
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -145,18 +217,30 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v8s64_vs
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32)
- ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; WAVE64-LABEL: name: extract_vector_elt_v8s64_vs
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; WAVE64: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32)
+ ; WAVE64: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE64: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vs
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; WAVE32: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C]](s32)
+ ; WAVE32: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE32: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $sgpr0
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -171,41 +255,77 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
- ; CHECK-LABEL: name: extract_vector_elt_v8s64_sv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF4]], %bb.0, %20, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
- ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
- ; CHECK: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
- ; CHECK: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
- ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v8s64_sv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; WAVE64: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; WAVE64: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; WAVE64: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE64: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+ ; WAVE32: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; WAVE32: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; WAVE32: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
+ ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE32: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $vgpr0
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -220,41 +340,77 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
- ; CHECK-LABEL: name: extract_vector_elt_v8s64_vv
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
- ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: .1:
- ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF4]], %bb.0, %20, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
- ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
- ; CHECK: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
- ; CHECK: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
- ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
- ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
- ; CHECK: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
- ; CHECK: .2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: .3:
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+
+ ; WAVE64-LABEL: name: extract_vector_elt_v8s64_vv
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; WAVE64: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE64: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE64: [[DEF4:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; WAVE64: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; WAVE64: .1:
+ ; WAVE64: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE64: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; WAVE64: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; WAVE64: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; WAVE64: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; WAVE64: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
+ ; WAVE64: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE64: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; WAVE64: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE64: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE64: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE64: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; WAVE64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE64: .2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; WAVE64: .3:
+ ; WAVE64: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE64: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+ ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
+ ; WAVE32: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+ ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF2:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF3:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; WAVE32: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; WAVE32: [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
+ ; WAVE32: .1:
+ ; WAVE32: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; WAVE32: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF4]], %bb.0, %20, %bb.1
+ ; WAVE32: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %7(s32), %bb.1
+ ; WAVE32: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %8(s32), %bb.1
+ ; WAVE32: [[PHI3:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF2]](s32), %bb.0, %3(s32), %bb.1
+ ; WAVE32: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF3]](s32), %bb.0, %4(s32), %bb.1
+ ; WAVE32: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; WAVE32: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+ ; WAVE32: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+ ; WAVE32: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[SHL]](s32)
+ ; WAVE32: [[EVEC1:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<16 x s32>), [[ADD]](s32)
+ ; WAVE32: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
+ ; WAVE32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; WAVE32: .2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
+ ; WAVE32: .3:
+ ; WAVE32: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EVEC]](s32), [[EVEC1]](s32)
+ ; WAVE32: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir?rev=373714&r1=373713&r2=373714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir Fri Oct 4 01:35:35 2019
@@ -85,12 +85,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -125,12 +125,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -165,12 +165,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -228,12 +228,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -344,12 +344,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -391,20 +391,20 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF5]], %bb.0, %25, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %25, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %8(s32), %bb.1
; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %9(s32), %bb.1
; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %10(<16 x s32>), %bb.1
; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
; CHECK: [[PHI5:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF4]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32)
; CHECK: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32)
; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -469,12 +469,12 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[V_READFIRSTLANE_B32_]](s32)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
@@ -516,20 +516,20 @@ body: |
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF5]], %bb.0, %25, %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %25, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %8(s32), %bb.1
; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %9(s32), %bb.1
; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %10(<16 x s32>), %bb.1
; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
; CHECK: [[PHI5:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF4]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
- ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32)
; CHECK: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32)
; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK: .2:
More information about the llvm-commits
mailing list