[llvm] [MachineCSE] Enhance MachineCSE simple PRE to find common subexpressi… (PR #129860)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 02:09:41 PST 2025
https://github.com/emelliu created https://github.com/llvm/llvm-project/pull/129860
…on completely (#129516).
When IF-THEN-ELSE in a loop, THEN and ELSE bb may have multiple MIs subexpression, but now
PRE process just can find one MI and hoist, this may result in increasing many temp registers
which liverange is from CMBB to THEN-ELSE BBs, this may cause register pressure.
This patch try to find all MIs in common subexpression and hoist to CMBB in PRE process.
>From c28c325ce280cdbd564ca11558c8538388ad0453 Mon Sep 17 00:00:00 2001
From: "xiangyou.liu" <xiangyou.liu at iluvatar.com>
Date: Wed, 26 Feb 2025 14:25:44 +0800
Subject: [PATCH] [MachineCSE] Enhance MachineCSE simple PRE to find common
subexpression completely (#129516).
When IF-THEN-ELSE in a loop, THEN and ELSE bb may have multiple MIs subexpression, but now
PRE process just can find one MI and hoist, this may result in increasing many temp registers
which liverange is from CMBB to THEN-ELSE BBs, this may cause register pressure.
This patch try to find all MIs in common subexpression and hoist to CMBB in PRE process.
Signed-off-by: xiangyou.liu <xiangyou.liu at iluvatar.com>
---
llvm/lib/CodeGen/MachineCSE.cpp | 17 +++
.../CodeGen/AMDGPU/not-cse-completely.mir | 126 ++++++++++++++++++
2 files changed, 143 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/not-cse-completely.mir
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 6d14509c5934f..9cdbcbafb6201 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -887,6 +887,23 @@ bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
NewMI.setDebugLoc(EmptyDL);
NewMI.getOperand(0).setReg(NewReg);
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_nodbg_operands(VReg))) {
+ if (MO.isUse()) {
+ MO.setReg(NewReg);
+ }
+ }
+ auto *SiblingBBMI = PREMap.try_emplace(&MI).first->getFirst();
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_nodbg_operands(
+ SiblingBBMI->getOperand(0).getReg()))) {
+ if (MO.isUse()) {
+ MachineInstr *UseMI = MO.getParent();
+ PREMap.erase(UseMI);
+ MO.setReg(NewReg);
+ PREMap[UseMI] = UseMI->getParent();
+ }
+ }
PREMap[&MI] = CMBB;
++NumPREs;
diff --git a/llvm/test/CodeGen/AMDGPU/not-cse-completely.mir b/llvm/test/CodeGen/AMDGPU/not-cse-completely.mir
new file mode 100644
index 0000000000000..c503fff062332
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/not-cse-completely.mir
@@ -0,0 +1,126 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -run-pass=machine-cse -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes=machine-cse -o - %s | FileCheck %s
+
+--- |
+ define amdgpu_kernel void @not_cse_completely(ptr addrspace(1), i32 %a, i1) {
+ entry:
+ br label %while.cond
+ while.cond:
+ %cmp = trunc i32 %a to i1
+ br i1 %cmp, label %if.then, label %if.else
+ if.then:
+ br label %if.end
+ if.else:
+ br label %if.end
+ if.end:
+ %cmp2 = trunc i32 %a to i1
+ br i1 %cmp, label %while.cond, label %while.end
+ while.end:
+ ret void
+ }
+...
+---
+name: not_cse_completely
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: not_cse_completely
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 2, 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]].sub0, 64, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[COPY2]].sub1, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY3]].sub0, 64, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[COPY3]].sub1, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.while.cond:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 42
+ ; CHECK-NEXT: S_CMP_EQ_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; CHECK-NEXT: [[V_MUL_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 [[V_ADD_U32_e64_]], [[V_ADD_U32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 [[V_MUL_I32_I24_e32_]], [[V_ADD_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 [[V_MUL_I32_I24_e32_]], [[V_MUL_I32_I24_e32_1]], 0, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.if.then:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MAX_I32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 [[V_SUB_I32_e64_]], [[V_ADD_U32_e64_3]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.if.else:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MIN_I32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 [[V_SUB_I32_e64_]], [[V_ADD_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.if.end:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MAX_I32_e32_]], %bb.2, [[V_MIN_I32_e32_]], %bb.3
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ; CHECK-NEXT: FLAT_STORE_DWORD [[COPY5]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PHI]], 2, implicit $exec
+ ; CHECK-NEXT: S_CMP_EQ_U32 [[V_CMP_EQ_U32_e64_]].sub0, 10, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.while.end:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5, $vgpr0_vgpr1, $vgpr2_vgpr3
+ %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 0, 0
+ %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 2, 0
+ %3:sreg_64 = COPY %1
+ %6:vreg_64 = COPY $vgpr0_vgpr1
+ %7:vreg_64 = COPY $vgpr2_vgpr3
+ %8:vgpr_32 = V_ADD_U32_e64 %6.sub0, 64, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e64 64, %6.sub1, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e64 %7.sub0, 64, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 64, %7.sub1, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1.while.cond:
+ %4:sreg_32 = COPY %2.sub1
+ %5:sreg_32 = S_MOV_B32 42
+ S_CMP_EQ_U32 %4, %5, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2.if.then:
+ %12:vgpr_32 = V_MUL_I32_I24_e32 %8, %9, implicit $exec
+ %13:vgpr_32 = V_MUL_I32_I24_e32 %12, %10, implicit $exec
+ %14:vgpr_32 = V_SUB_I32_e64 %12, %13, 0, implicit $exec
+ %15:vgpr_32 = V_MAX_I32_e32 %14, %11, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3.if.else:
+ %16:vgpr_32 = V_MUL_I32_I24_e32 %8, %9, implicit $exec
+ %17:vgpr_32 = V_MUL_I32_I24_e32 %16, %10, implicit $exec
+ %18:vgpr_32 = V_SUB_I32_e64 %16, %17, 0, implicit $exec
+ %19:vgpr_32 = V_MIN_I32_e32 %18, %10, implicit $exec
+
+ bb.4.if.end:
+ %20:vgpr_32 = PHI %15, %bb.2, %19, %bb.3
+ %21:vreg_64 = COPY %1
+ FLAT_STORE_DWORD %21, %20, 0, 0, implicit $exec, implicit $flat_scr
+ %22:sreg_64 = V_CMP_EQ_U32_e64 %20, 2, implicit $exec
+ S_CMP_EQ_U32 %22.sub0, 10, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.5
+
+ bb.5.while.end:
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list