[llvm] 95dd0b0 - AMDGPU/SILowerI1Copies process phi incomings in specific order (#72375)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 15 07:27:55 PST 2023
Author: petar-avramovic
Date: 2023-11-15T16:27:51+01:00
New Revision: 95dd0b04d270fe3262e2494bfb9033d8ca07470a
URL: https://github.com/llvm/llvm-project/commit/95dd0b04d270fe3262e2494bfb9033d8ca07470a
DIFF: https://github.com/llvm/llvm-project/commit/95dd0b04d270fe3262e2494bfb9033d8ca07470a.diff
LOG: AMDGPU/SILowerI1Copies process phi incomings in specific order (#72375)
When merging lane masks, value from block that is always visited first
(PrevReg in buildMergeLaneMasks) needs to exist because we do on-the-fly
constant folding. For PrevReg to exist, basic block that should contain
PrevReg definition must be processed first. Sort the incomings such that
incoming values that dominate other incoming values are processed first.
Sorting of phi incomings makes no changes for phis created by SDAG
because SDAG adds phi incomings as it selects basic blocks in reversed
post order traversal.
This change is required by upcoming lane mask merging implementation
for GlobalISel that leaves phi incomings as they are in IR.
Added:
llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
Modified:
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 5d633d32ee582eb..68c8f4024e73007 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -559,6 +559,7 @@ bool SILowerI1Copies::lowerPhis() {
if (Vreg1Phis.empty())
return false;
+ DT->getBase().updateDFSNumbers();
MachineBasicBlock *PrevMBB = nullptr;
for (MachineInstr *MI : Vreg1Phis) {
MachineBasicBlock &MBB = *MI->getParent();
@@ -593,6 +594,15 @@ bool SILowerI1Copies::lowerPhis() {
Incomings.emplace_back(IncomingReg, IncomingMBB, Register{});
}
+ // Sort the incomings such that incoming values that dominate other incoming
+ // values are sorted earlier. This allows us to do some amount of on-the-fly
+ // constant folding.
+ // Incoming with smaller DFSNumIn goes first, DFSNumIn is 0 for entry block.
+ llvm::sort(Incomings, [this](Incoming LHS, Incoming RHS) {
+ return DT->getNode(LHS.Block)->getDFSNumIn() <
+ DT->getNode(RHS.Block)->getDFSNumIn();
+ });
+
#ifndef NDEBUG
PhiRegisters.insert(DstReg);
#endif
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
new file mode 100644
index 000000000000000..695beab8dd24dc8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
@@ -0,0 +1,151 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
+
+# Order in which SILowerI1Copies build instructions to merge lane masks should
+# not depend on order of incoming operands in phi instruction.
+# SDAG adds phi incomings as it processes basic blocks in reversed post order
+# traversal. Because of that, incomings in phis created by SDAG are sorted,
+# compared to the how phi looked in IR, in convenient way for lowerPhis.
+
+# Here incomings for %20:vreg_1 = PHI %19, %bb.1, %26, %bb.2 are swapped
+# to verify that SILowerI1Copies sorts incomings from phi appropriately before
+# it starts merging lane masks.
+
+---
+name: phi
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: phi
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $exec_lo
+ ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF5]], %bb.0, %20, %bb.3
+ ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[COPY6]], %bb.0, %37, %bb.3
+ ; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %16, %bb.3
+ ; GCN-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI [[COPY5]], %bb.0, %18, %bb.3
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[PHI1]]
+ ; GCN-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[PHI]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PHI1]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_]], [[S_AND_B32_]], implicit-def $scc
+ ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY7]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI3]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[GLOBAL_LOAD_DWORD]], killed [[S_MOV_B32_2]], implicit $exec
+ ; GCN-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_1]], implicit-def $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.1, [[S_OR_B32_1]], %bb.2
+ ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4
+ ; GCN-NEXT: [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[PHI2]], killed [[S_MOV_B32_3]], implicit-def dead $scc
+ ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 9
+ ; GCN-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[PHI1]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[PHI4]], $exec_lo, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_2]], [[S_AND_B32_2]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_GT_I32 [[S_ADD_I32_]], killed [[S_MOV_B32_4]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.4:
+ ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
+ ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[PHI1]]
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_5]]
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_6]], 0, [[COPY9]], [[COPY8]], implicit $exec
+ ; GCN-NEXT: FLAT_STORE_DWORD [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ ; GCN-NEXT: SI_RETURN
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4
+
+ %0:vgpr_32 = COPY $vgpr4
+ %1:vgpr_32 = COPY $vgpr3
+ %2:vgpr_32 = COPY $vgpr2
+ %3:vgpr_32 = COPY $vgpr1
+ %4:sgpr_32 = IMPLICIT_DEF
+ %5:sgpr_32 = IMPLICIT_DEF
+ %6:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+ %7:sgpr_32 = IMPLICIT_DEF
+ %8:sgpr_32 = IMPLICIT_DEF
+ %9:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+ %10:sreg_32 = S_MOV_B32 -1
+ %11:sreg_32 = S_MOV_B32 -1
+ %12:vreg_64 = COPY %6
+ %13:vreg_64 = COPY %9
+ %14:vreg_1 = COPY %10, implicit $exec
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %15:sreg_32 = PHI %11, %bb.0, %16, %bb.3
+ %17:vreg_64 = PHI %13, %bb.0, %18, %bb.3
+ %19:vreg_1 = PHI %14, %bb.0, %20, %bb.3
+ %21:sreg_32 = COPY %19
+ %22:sreg_32 = SI_IF %21, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+ %23:vgpr_32 = GLOBAL_LOAD_DWORD %17, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ %24:sreg_32 = S_MOV_B32 0
+ %25:sreg_32 = V_CMP_EQ_U32_e64 killed %23, killed %24, implicit $exec
+ %26:vreg_1 = COPY %25
+
+ bb.3:
+ successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+
+ %20:vreg_1 = PHI %26, %bb.2, %19, %bb.1 ;%20:vreg_1 = PHI %19, %bb.1, %26, %bb.2 - this is original phi created by SDAG
+ SI_END_CF %22, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %27:sreg_64 = S_MOV_B64 4
+ %18:vreg_64 = V_ADD_U64_PSEUDO %17, killed %27, implicit-def dead $vcc, implicit $exec
+ %28:sreg_32 = S_MOV_B32 1
+ %16:sreg_32 = nsw S_ADD_I32 %15, killed %28, implicit-def dead $scc
+ %29:sreg_32 = S_MOV_B32 9
+ S_CMP_GT_I32 %16, killed %29, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.4
+
+ bb.4:
+ %30:vreg_1 = PHI %19, %bb.3
+ %31:sgpr_32 = S_MOV_B32 1065353216
+ %32:sgpr_32 = S_MOV_B32 0
+ %33:sreg_32_xm0_xexec = COPY %30
+ %34:vgpr_32 = COPY killed %31
+ %35:vgpr_32 = V_CNDMASK_B32_e64 0, killed %32, 0, %34, %33, implicit $exec
+ FLAT_STORE_DWORD %12, killed %35, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ SI_RETURN
+...
More information about the llvm-commits
mailing list