[llvm-branch-commits] [llvm] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM (PR #125351)
Akshat Oke via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Feb 1 10:25:00 PST 2025
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/125351
None
>From 7ef9eb592f3d84763de4d9a59cd6f6ea737e82f0 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Sat, 1 Feb 2025 18:21:24 +0000
Subject: [PATCH] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +-
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +-
.../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 42 ++++++++++++++-----
.../AMDGPU/SIOptimizeExecMaskingPreRA.h | 24 +++++++++++
.../CodeGen/AMDGPU/collapse-endcf-broken.mir | 1 +
...ask-pre-ra-non-empty-but-used-interval.mir | 1 +
7 files changed, 62 insertions(+), 13 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 31656c98ccd36fa..692eb937efa6743 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
-void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
+void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
extern char &SIOptimizeExecMaskingPreRAID;
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 45e2f0d9097adfd..95d82ae407e6772 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -108,6 +108,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
@@ -127,7 +128,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d21f9c846c95130..27dc5a502d109b2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -46,6 +46,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
+#include "SIOptimizeExecMaskingPreRA.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
@@ -495,7 +496,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIFoldOperandsLegacyPass(*PR);
initializeSIPeepholeSDWALegacyPass(*PR);
initializeSIShrinkInstructionsLegacyPass(*PR);
- initializeSIOptimizeExecMaskingPreRAPass(*PR);
+ initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
initializeSILoadStoreOptimizerLegacyPass(*PR);
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 31f65d82a4d2bcb..2a8a398d7429d05 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -12,6 +12,7 @@
///
//===----------------------------------------------------------------------===//
+#include "SIOptimizeExecMaskingPreRA.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -25,7 +26,7 @@ using namespace llvm;
namespace {
-class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+class SIOptimizeExecMaskingPreRA {
private:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
@@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
bool optimizeElseBranch(MachineBasicBlock &MBB);
+public:
+ SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
+ bool run(MachineFunction &MF);
+};
+
+class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass {
public:
static char ID;
- SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
- initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
+ SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) {
+ initializeSIOptimizeExecMaskingPreRALegacyPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -64,18 +72,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
} // End anonymous namespace.
-INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false)
-char SIOptimizeExecMaskingPreRA::ID = 0;
+char SIOptimizeExecMaskingPreRALegacy::ID = 0;
-char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
+char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID;
FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
- return new SIOptimizeExecMaskingPreRA();
+ return new SIOptimizeExecMaskingPreRALegacy();
}
// See if there is a def between \p AndIdx and \p SelIdx that needs to live
@@ -340,15 +348,29 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
return true;
}
-bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ SIOptimizeExecMaskingPreRA(&LIS).run(MF);
+ return PreservedAnalyses::all();
+}
+
+bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
+ MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ return SIOptimizeExecMaskingPreRA(LIS).run(MF);
+}
+
+bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ // LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
const bool Wave32 = ST.isWave32();
AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
new file mode 100644
index 000000000000000..eca79c0d8dbfbb2
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
@@ -0,0 +1,24 @@
+//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------------------*-
+//C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIOptimizeExecMaskingPreRAPass
+ : public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
index 7aea97a3053c7d5..2eb1f5d559651b5 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
# FIXME: This is a miscompile, and the s_or_b64s need to be preserved.
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
index 63ee27e0f83ba6e..9673186e5ae3f64 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s
# This sample can trigger a "Non-empty but used interval" assert in regalloc if
# SIOptimizeExecMaskingPreRA does not update live intervals correctly.
More information about the llvm-branch-commits
mailing list