[llvm] b32d3d9 - AMDGPU: Treat IMPLICIT_DEF like a constant lanemask source
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 27 08:44:47 PDT 2021
Author: Matt Arsenault
Date: 2021-07-27T11:44:38-04:00
New Revision: b32d3d9e81cdd9275d19cd2a396c461edc9e7189
URL: https://github.com/llvm/llvm-project/commit/b32d3d9e81cdd9275d19cd2a396c461edc9e7189
DIFF: https://github.com/llvm/llvm-project/commit/b32d3d9e81cdd9275d19cd2a396c461edc9e7189.diff
LOG: AMDGPU: Treat IMPLICIT_DEF like a constant lanemask source
This is partially a workaround. SILowerI1Copies does not understand
unstructured loops. This would result in inserting instructions to
merge a mask register in the same block where it was defined in an
unstructured loop.
Added:
llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
Modified:
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 9570680ad9cbb..672266f0c11e7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -598,6 +598,11 @@ void SILowerI1Copies::lowerPhis() {
MachineBasicBlock *PostDomBound =
PDT->findNearestCommonDominator(DomBlocks);
+
+ // FIXME: This fails to find irreducible cycles. If we have a def (other
+ // than a constant) in a pair of blocks that end up looping back to each
+ // other, it will be mishandle. Due to structurization this shouldn't occur
+ // in practice.
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
SSAUpdater.Initialize(DstReg);
@@ -732,6 +737,9 @@ bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
const MachineInstr *MI;
for (;;) {
MI = MRI->getUniqueVRegDef(Reg);
+ if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
+ return true;
+
if (MI->getOpcode() != AMDGPU::COPY)
break;
@@ -808,9 +816,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DstReg,
unsigned PrevReg, unsigned CurReg) {
- bool PrevVal;
+ bool PrevVal = false;
bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
- bool CurVal;
+ bool CurVal = false;
bool CurConstant = isConstantLaneMask(CurReg, CurVal);
if (PrevConstant && CurConstant) {
diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
new file mode 100644
index 0000000000000..7bffc99e9caf1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -0,0 +1,171 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s
+
+# %bb.1 and %bb.3 loop back to each other, and thus neither dominates
+# the other.
+# When the phi in %bb.3 is handled, it attempted to insert instructions
+# in %bb.1 to handle this def, but ended up inserting mask management
+# instructions before the def of %34. This is avoided by treating
+# IMPLICIT_DEF specially like constants
+
+---
+name: recursive_vreg_1_phi
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; CHECK-LABEL: name: recursive_vreg_1_phi
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 20
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; CHECK: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+ ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 killed [[DEF3]], killed [[DEF1]], implicit $exec
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK: [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 31, [[COPY2]], implicit $exec
+ ; CHECK: [[DEF5:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_ASHRREV_I32_e32_]], %subreg.sub1
+ ; CHECK: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY killed [[S_MOV_B32_2]]
+ ; CHECK: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec
+ ; CHECK: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[V_LSHL_B64_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ ; CHECK: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 68
+ ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
+ ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; CHECK: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 432
+ ; CHECK: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 killed [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_5]], [[REG_SEQUENCE1]], 0, implicit $exec
+ ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+ ; CHECK: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+ ; CHECK: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+ ; CHECK: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+ ; CHECK: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+ ; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: S_CBRANCH_SCC1 %bb.3, implicit $scc
+ ; CHECK: S_BRANCH %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ ; CHECK: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
+ ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]]
+ ; CHECK: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY7]], implicit $exec
+ ; CHECK: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[V_LSHR_B32_e32_]], implicit $exec
+ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
+ ; CHECK: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
+ ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
+ ; CHECK: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY9]], implicit-def dead $scc
+ ; CHECK: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK: [[DEF9:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[COPY6]], $exec, implicit-def $scc
+ ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_OR_B64_]], $exec, implicit-def $scc
+ ; CHECK: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x00000000), %bb.1(0x80000000)
+ ; CHECK: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
+ ; CHECK: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
+ ; CHECK: [[PHI5:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.1, [[S_MOV_B64_2]], %bb.2
+ ; CHECK: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI5]], implicit $exec
+ ; CHECK: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; CHECK: [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK: V_CMP_NE_U32_e32 killed [[S_MOV_B32_9]], [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
+ ; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+ ; CHECK: [[S_ANDN2_B64_2:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI4]], $exec, implicit-def $scc
+ ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
+ ; CHECK: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_2]], [[S_AND_B64_2]], implicit-def $scc
+ ; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK: S_BRANCH %bb.4
+ ; CHECK: bb.4:
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:sreg_32 = S_MOV_B32 20
+ %2:vgpr_32 = COPY %1
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:sreg_32 = S_MOV_B32 10
+ %5:vgpr_32 = COPY %4
+ %6:vgpr_32 = IMPLICIT_DEF
+ %7:vgpr_32 = IMPLICIT_DEF
+ %8:vgpr_32 = V_OR_B32_e32 killed %7, killed %3, implicit $exec
+ %9:vgpr_32 = COPY $vgpr0
+ %10:sreg_32 = IMPLICIT_DEF
+ %11:vgpr_32 = V_ASHRREV_I32_e32 31, %9, implicit $exec
+ %12:sreg_32_xm0 = IMPLICIT_DEF
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+ %14:sreg_32 = S_MOV_B32 2
+ %15:sgpr_32 = COPY killed %14
+ %16:vreg_64 = V_LSHL_B64_e64 killed %13, %15, implicit $exec
+ %17:vgpr_32 = FLAT_LOAD_DWORD killed %16, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ %18:sreg_32 = S_MOV_B32 0
+ %19:sreg_32 = S_MOV_B32 68
+ %20:vgpr_32 = COPY killed %19
+ %21:vgpr_32 = COPY %18
+ %22:vreg_64 = REG_SEQUENCE killed %20, %subreg.sub0, %21, %subreg.sub1
+ %23:sreg_32 = S_MOV_B32 432
+ %24:vreg_64, %25:sreg_64 = V_MAD_I64_I32_e64 killed %17, killed %23, %22, 0, implicit $exec
+ %26:sreg_64 = S_MOV_B64 0
+ %27:vreg_1 = COPY %26, implicit $exec
+
+ bb.1:
+ successors: %bb.2, %bb.3
+
+ %28:sreg_32 = PHI %18, %bb.0, %29, %bb.3
+ %30:vreg_1 = PHI %27, %bb.0, %31, %bb.3
+ %32:sreg_32 = S_MOV_B32 0
+ S_CMP_EQ_U32 %28, killed %32, implicit-def $scc
+ %33:sreg_64 = S_MOV_B64 -1
+ %34:sreg_64 = IMPLICIT_DEF
+ %35:vreg_1 = COPY %34
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ %36:vgpr_32 = FLAT_LOAD_DWORD %24, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+ %37:sreg_32 = S_MOV_B32 6
+ %38:vgpr_32 = COPY %37
+ %39:vgpr_32 = V_LSHR_B32_e32 killed %36, killed %38, implicit $exec
+ %40:sreg_32 = IMPLICIT_DEF
+ %41:vgpr_32 = V_AND_B32_e64 1, %39, implicit $exec
+ %42:sreg_64 = V_CMP_EQ_U32_e64 killed %41, 1, implicit $exec
+ %43:sreg_64 = COPY %30
+ %44:sreg_64 = S_AND_B64 %43, killed %42, implicit-def dead $scc
+ %45:sreg_64 = COPY %30
+ %46:sreg_64 = S_OR_B64 killed %44, %45, implicit-def dead $scc
+ %47:sreg_64 = S_MOV_B64 0
+ %48:vreg_1 = COPY %46
+
+ bb.3:
+ successors: %bb.4(0x00000000), %bb.1(0x80000000)
+
+ %31:vreg_1 = PHI %35, %bb.1, %48, %bb.2
+ %49:sreg_64_xexec = PHI %33, %bb.1, %47, %bb.2
+ %29:sreg_32 = S_MOV_B32 -1
+ %50:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %49, implicit $exec
+ %51:sreg_32 = S_MOV_B32 1
+ %52:sreg_32 = IMPLICIT_DEF
+ V_CMP_NE_U32_e32 killed %51, %50, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.4
+
+ bb.4:
+
+...
More information about the llvm-commits
mailing list