[llvm] b32d3d9 - AMDGPU: Treat IMPLICIT_DEF like a constant lanemask source

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 27 08:44:47 PDT 2021


Author: Matt Arsenault
Date: 2021-07-27T11:44:38-04:00
New Revision: b32d3d9e81cdd9275d19cd2a396c461edc9e7189

URL: https://github.com/llvm/llvm-project/commit/b32d3d9e81cdd9275d19cd2a396c461edc9e7189
DIFF: https://github.com/llvm/llvm-project/commit/b32d3d9e81cdd9275d19cd2a396c461edc9e7189.diff

LOG: AMDGPU: Treat IMPLICIT_DEF like a constant lanemask source

This is partially a workaround. SILowerI1Copies does not understand
unstructured loops. This would result in inserting instructions to
merge a mask register in the same block where it was defined in an
unstructured loop.

Added: 
    llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir

Modified: 
    llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 9570680ad9cbb..672266f0c11e7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -598,6 +598,11 @@ void SILowerI1Copies::lowerPhis() {
 
     MachineBasicBlock *PostDomBound =
         PDT->findNearestCommonDominator(DomBlocks);
+
+    // FIXME: This fails to find irreducible cycles. If we have a def (other
+    // than a constant) in a pair of blocks that end up looping back to each
+    // other, it will be mishandle. Due to structurization this shouldn't occur
+    // in practice.
     unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
 
     SSAUpdater.Initialize(DstReg);
@@ -732,6 +737,9 @@ bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
   const MachineInstr *MI;
   for (;;) {
     MI = MRI->getUniqueVRegDef(Reg);
+    if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
+      return true;
+
     if (MI->getOpcode() != AMDGPU::COPY)
       break;
 
@@ -808,9 +816,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           const DebugLoc &DL, unsigned DstReg,
                                           unsigned PrevReg, unsigned CurReg) {
-  bool PrevVal;
+  bool PrevVal = false;
   bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
-  bool CurVal;
+  bool CurVal = false;
   bool CurConstant = isConstantLaneMask(CurReg, CurVal);
 
   if (PrevConstant && CurConstant) {

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
new file mode 100644
index 0000000000000..7bffc99e9caf1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -0,0 +1,171 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s
+
+# %bb.1 and %bb.3 loop back to each other, and thus neither dominates
+# the other.
+# When the phi in %bb.3 is handled, it attempted to insert instructions
+# in %bb.1 to handle this def, but ended up inserting mask management
+# instructions before the def of %34. This is avoided by treating
+# IMPLICIT_DEF specially like constants
+
+---
+name:            recursive_vreg_1_phi
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; CHECK-LABEL: name: recursive_vreg_1_phi
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+  ; CHECK:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 20
+  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10
+  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK:   [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK:   [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 killed [[DEF3]], killed [[DEF1]], implicit $exec
+  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK:   [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 31, [[COPY2]], implicit $exec
+  ; CHECK:   [[DEF5:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+  ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_ASHRREV_I32_e32_]], %subreg.sub1
+  ; CHECK:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY killed [[S_MOV_B32_2]]
+  ; CHECK:   [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec
+  ; CHECK:   [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[V_LSHL_B64_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+  ; CHECK:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 68
+  ; CHECK:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_4]]
+  ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
+  ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
+  ; CHECK:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 432
+  ; CHECK:   [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 killed [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_5]], [[REG_SEQUENCE1]], 0, implicit $exec
+  ; CHECK:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+  ; CHECK:   [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+  ; CHECK:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+  ; CHECK:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+  ; CHECK:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; CHECK:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+  ; CHECK:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+  ; CHECK:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+  ; CHECK:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK:   S_CBRANCH_SCC1 %bb.3, implicit $scc
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+  ; CHECK:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
+  ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]]
+  ; CHECK:   [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY7]], implicit $exec
+  ; CHECK:   [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[V_LSHR_B32_e32_]], implicit $exec
+  ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
+  ; CHECK:   [[COPY8:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
+  ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; CHECK:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[PHI1]]
+  ; CHECK:   [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY9]], implicit-def dead $scc
+  ; CHECK:   [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+  ; CHECK:   [[DEF9:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK:   [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[COPY6]], $exec, implicit-def $scc
+  ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_OR_B64_]], $exec, implicit-def $scc
+  ; CHECK:   [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.4(0x00000000), %bb.1(0x80000000)
+  ; CHECK:   [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
+  ; CHECK:   [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
+  ; CHECK:   [[PHI5:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.1, [[S_MOV_B64_2]], %bb.2
+  ; CHECK:   [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; CHECK:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI5]], implicit $exec
+  ; CHECK:   [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; CHECK:   [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK:   V_CMP_NE_U32_e32 killed [[S_MOV_B32_9]], [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
+  ; CHECK:   $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+  ; CHECK:   [[S_ANDN2_B64_2:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI4]], $exec, implicit-def $scc
+  ; CHECK:   [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
+  ; CHECK:   [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_2]], [[S_AND_B64_2]], implicit-def $scc
+  ; CHECK:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK:   S_BRANCH %bb.4
+  ; CHECK: bb.4:
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:sreg_64 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 20
+    %2:vgpr_32 = COPY %1
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:sreg_32 = S_MOV_B32 10
+    %5:vgpr_32 = COPY %4
+    %6:vgpr_32 = IMPLICIT_DEF
+    %7:vgpr_32 = IMPLICIT_DEF
+    %8:vgpr_32 = V_OR_B32_e32 killed %7, killed %3, implicit $exec
+    %9:vgpr_32 = COPY $vgpr0
+    %10:sreg_32 = IMPLICIT_DEF
+    %11:vgpr_32 = V_ASHRREV_I32_e32 31, %9, implicit $exec
+    %12:sreg_32_xm0 = IMPLICIT_DEF
+    %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+    %14:sreg_32 = S_MOV_B32 2
+    %15:sgpr_32 = COPY killed %14
+    %16:vreg_64 = V_LSHL_B64_e64 killed %13, %15, implicit $exec
+    %17:vgpr_32 = FLAT_LOAD_DWORD killed %16, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+    %18:sreg_32 = S_MOV_B32 0
+    %19:sreg_32 = S_MOV_B32 68
+    %20:vgpr_32 = COPY killed %19
+    %21:vgpr_32 = COPY %18
+    %22:vreg_64 = REG_SEQUENCE killed %20, %subreg.sub0, %21, %subreg.sub1
+    %23:sreg_32 = S_MOV_B32 432
+    %24:vreg_64, %25:sreg_64 = V_MAD_I64_I32_e64 killed %17, killed %23, %22, 0, implicit $exec
+    %26:sreg_64 = S_MOV_B64 0
+    %27:vreg_1 = COPY %26, implicit $exec
+
+  bb.1:
+    successors: %bb.2, %bb.3
+
+    %28:sreg_32 = PHI %18, %bb.0, %29, %bb.3
+    %30:vreg_1 = PHI %27, %bb.0, %31, %bb.3
+    %32:sreg_32 = S_MOV_B32 0
+    S_CMP_EQ_U32 %28, killed %32, implicit-def $scc
+    %33:sreg_64 = S_MOV_B64 -1
+    %34:sreg_64 = IMPLICIT_DEF
+    %35:vreg_1 = COPY %34
+    S_CBRANCH_SCC1 %bb.3, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    %36:vgpr_32 = FLAT_LOAD_DWORD %24, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
+    %37:sreg_32 = S_MOV_B32 6
+    %38:vgpr_32 = COPY %37
+    %39:vgpr_32 = V_LSHR_B32_e32 killed %36, killed %38, implicit $exec
+    %40:sreg_32 = IMPLICIT_DEF
+    %41:vgpr_32 = V_AND_B32_e64 1, %39, implicit $exec
+    %42:sreg_64 = V_CMP_EQ_U32_e64 killed %41, 1, implicit $exec
+    %43:sreg_64 = COPY %30
+    %44:sreg_64 = S_AND_B64 %43, killed %42, implicit-def dead $scc
+    %45:sreg_64 = COPY %30
+    %46:sreg_64 = S_OR_B64 killed %44, %45, implicit-def dead $scc
+    %47:sreg_64 = S_MOV_B64 0
+    %48:vreg_1 = COPY %46
+
+  bb.3:
+    successors: %bb.4(0x00000000), %bb.1(0x80000000)
+
+    %31:vreg_1 = PHI %35, %bb.1, %48, %bb.2
+    %49:sreg_64_xexec = PHI %33, %bb.1, %47, %bb.2
+    %29:sreg_32 = S_MOV_B32 -1
+    %50:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %49, implicit $exec
+    %51:sreg_32 = S_MOV_B32 1
+    %52:sreg_32 = IMPLICIT_DEF
+    V_CMP_NE_U32_e32 killed %51, %50, implicit-def $vcc, implicit $exec
+    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+    S_BRANCH %bb.4
+
+  bb.4:
+
+...


        


More information about the llvm-commits mailing list