[llvm] r273916 - AMDGPU: Fix verifier errors with undef vector indices
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 27 12:57:44 PDT 2016
Author: arsenm
Date: Mon Jun 27 14:57:44 2016
New Revision: 273916
URL: http://llvm.org/viewvc/llvm-project?rev=273916&view=rev
Log:
AMDGPU: Fix verifier errors with undef vector indices
Also fix pointlessly adding exec to liveins.
Added:
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=273916&r1=273915&r2=273916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Mon Jun 27 14:57:44 2016
@@ -94,10 +94,12 @@ private:
MachineBasicBlock &LoopBB,
MachineBasicBlock &RemainderBB,
unsigned SaveReg,
- unsigned IdxReg);
+ const MachineOperand &IdxReg);
void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
- MachineInstr *MovRel, unsigned IdxReg, int Offset);
+ MachineInstr *MovRel,
+ const MachineOperand &IdxReg,
+ int Offset);
bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
@@ -386,7 +388,7 @@ void SILowerControlFlow::splitBlockLiveI
MachineBasicBlock &LoopBB,
MachineBasicBlock &RemainderBB,
unsigned SaveReg,
- unsigned IdxReg) {
+ const MachineOperand &IdxReg) {
LivePhysRegs RemainderLiveRegs(TRI);
RemainderLiveRegs.addLiveOuts(MBB);
@@ -399,29 +401,38 @@ void SILowerControlFlow::splitBlockLiveI
RemainderLiveRegs.addReg(SaveReg);
if (const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)) {
- RemainderLiveRegs.addReg(Val->getReg());
- LoopBB.addLiveIn(Val->getReg());
+ if (!Val->isUndef()) {
+ RemainderLiveRegs.addReg(Val->getReg());
+ LoopBB.addLiveIn(Val->getReg());
+ }
+ }
+
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (unsigned Reg : RemainderLiveRegs) {
+ if (MRI.isAllocatable(Reg))
+ RemainderBB.addLiveIn(Reg);
}
- for (unsigned Reg : RemainderLiveRegs)
- RemainderBB.addLiveIn(Reg);
- unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
- LoopBB.addLiveIn(SrcReg);
- LoopBB.addLiveIn(IdxReg);
+ const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
+ if (!Src->isUndef())
+ LoopBB.addLiveIn(Src->getReg());
+
+ if (!IdxReg.isUndef())
+ LoopBB.addLiveIn(IdxReg.getReg());
LoopBB.sortUniqueLiveIns();
}
void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
DebugLoc DL,
MachineInstr *MovRel,
- unsigned IdxReg,
+ const MachineOperand &IdxReg,
int Offset) {
MachineBasicBlock::iterator I = LoopBB.begin();
// Read the next variant into VCC (lower 32 bits) <- also loop target
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO)
- .addReg(IdxReg);
+ .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
// Move index from VCC into M0
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
@@ -430,7 +441,7 @@ void SILowerControlFlow::emitLoadM0FromV
// Compare the just read M0 value to all possible Idx values
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
.addReg(AMDGPU::M0)
- .addReg(IdxReg);
+ .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
// Update EXEC, save the original EXEC value to VCC
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
@@ -461,16 +472,16 @@ bool SILowerControlFlow::loadM0(MachineI
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
- unsigned Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx)->getReg();
+ const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
- if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) {
if (Offset) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
- .addReg(Idx)
+ .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()))
.addImm(Offset);
} else {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(Idx);
+ .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()));
}
MBB.insert(I, MovRel);
@@ -485,7 +496,7 @@ bool SILowerControlFlow::loadM0(MachineI
// Reading from a VGPR requires looping over all workitems in the wavefront.
assert(AMDGPU::SReg_64RegClass.contains(Save) &&
- AMDGPU::VGPR_32RegClass.contains(Idx));
+ AMDGPU::VGPR_32RegClass.contains(Idx->getReg()));
// Save the EXEC mask
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
@@ -504,14 +515,13 @@ bool SILowerControlFlow::loadM0(MachineI
LoopBB->addSuccessor(LoopBB);
LoopBB->addSuccessor(RemainderBB);
- if (TRI->trackLivenessAfterRegAlloc(MF))
- splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, Idx);
+ splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx);
// Move the rest of the block into a new block.
RemainderBB->transferSuccessors(&MBB);
RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
- emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, Idx, Offset);
+ emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
MachineBasicBlock::iterator First = RemainderBB->begin();
BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
@@ -554,16 +564,16 @@ bool SILowerControlFlow::indirectSrc(Mac
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- unsigned Vec = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
+ const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
unsigned Reg;
- computeIndirectRegAndOffset(Vec, Reg, Off);
+ computeIndirectRegAndOffset(SrcVec->getReg(), Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(Reg)
- .addReg(Vec, RegState::Implicit);
+ .addReg(Reg, getUndefRegState(SrcVec->isUndef()))
+ .addReg(SrcVec->getReg(), RegState::Implicit);
return loadM0(MI, MovRel, Off);
}
@@ -575,7 +585,7 @@ bool SILowerControlFlow::indirectDst(Mac
unsigned Dst = MI.getOperand(0).getReg();
int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
- unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)->getReg();
+ MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
unsigned Reg;
computeIndirectRegAndOffset(Dst, Reg, Off);
@@ -583,7 +593,7 @@ bool SILowerControlFlow::indirectDst(Mac
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
.addReg(Reg, RegState::Define)
- .addReg(Val)
+ .addReg(Val->getReg(), getUndefRegState(Val->isUndef()))
.addReg(Dst, RegState::Implicit);
return loadM0(MI, MovRel, Off);
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=273916&r1=273915&r2=273916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Mon Jun 27 14:57:44 2016
@@ -94,6 +94,27 @@ entry:
ret void
}
+; CHECK-LABEL: {{^}}extract_undef_offset_sgpr:
+define void @extract_undef_offset_sgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+entry:
+ %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = extractelement <4 x i32> %ld, i32 undef
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_undef_offset_sgpr_vector_src:
+; CHECK: buffer_load_dwordx4
+; CHECK: s_mov_b32 m0,
+; CHECK-NEXT: v_movreld_b32
+define void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: {{^}}insert_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
Added: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir?rev=273916&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir Mon Jun 27 14:57:44 2016
@@ -0,0 +1,327 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o /dev/null %s 2>&1 | FileCheck %s
+# Getting an undef that is specifically a VGPR is tricky from IR
+
+# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+
+--- |
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+ define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ entry:
+ %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = extractelement <4 x i32> %ld, i32 undef
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ entry:
+ %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = extractelement <4 x i32> %ld, i32 undef
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+ }
+
+ define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+ }
+
+ define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) {
+ entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %value = insertelement <4 x i32> %ld, i32 undef, i32 %idx
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+ }
+
+...
+---
+name: extract_undef_offset_vgpr
+alignment: 0
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ S_WAITCNT 127
+ %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ S_WAITCNT 3952
+ %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+ S_WAITCNT 127
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
+# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1
+
+name: extract_undef_neg_offset_vgpr
+alignment: 0
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ S_WAITCNT 127
+ %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ S_WAITCNT 3952
+ %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+ S_WAITCNT 127
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1
+
+name: insert_undef_offset_vgpr
+alignment: 0
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr4 = V_MOV_B32_e32 5, implicit %exec
+ S_WAITCNT 127
+ %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ S_WAITCNT 3952
+ %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+ S_WAITCNT 127
+ BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+ S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+name: insert_undef_neg_offset_vgpr
+alignment: 0
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr4 = V_MOV_B32_e32 5, implicit %exec
+ S_WAITCNT 127
+ %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ S_WAITCNT 3952
+ %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+ S_WAITCNT 127
+ BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+ S_ENDPGM
+
+...
+
+# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+name: insert_undef_value_offset_vgpr
+alignment: 0
+exposesReturnsTwice: false
+hasInlineAsm: false
+allVRegsAllocated: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr4 = V_MOV_B32_e32 2, implicit %exec
+ S_WAITCNT 127
+ %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+ S_WAITCNT 3952
+ %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+ S_WAITCNT 127
+ BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list