[llvm] [WIP][AMDGPU] combine uniform AMDGPU lane Intrinsics (PR #116953)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 06:56:37 PST 2024
================
@@ -0,0 +1,183 @@
+//===-- AMDGPUUniformIntrinsicCombine.cpp
+//-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass combines uniform intrinsic instructions.
+/// Unifrom Intrinsic combine uses pattern match to identify and optimize
+/// redundent intrinsic instruction.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+using namespace llvm::PatternMatch;
+
+namespace {
+
+class AMDGPUUniformIntrinsicCombine : public FunctionPass {
+public:
+ static char ID;
+ AMDGPUUniformIntrinsicCombine() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ }
+};
+
+class AMDGPUUniformIntrinsicCombineImpl
+ : public InstVisitor<AMDGPUUniformIntrinsicCombineImpl> {
+private:
+ const UniformityInfo *UI;
+
+ void optimizeUniformIntrinsicInst(IntrinsicInst &II) const;
+
+public:
+ AMDGPUUniformIntrinsicCombineImpl() = delete;
+
+ AMDGPUUniformIntrinsicCombineImpl(const UniformityInfo *UI) : UI(UI) {}
+
+ bool run(Function &F);
+};
+
+} // namespace
+
+char AMDGPUUniformIntrinsicCombine::ID = 0;
+
+char &llvm::AMDGPUUniformIntrinsicCombineID = AMDGPUUniformIntrinsicCombine::ID;
+
+bool AMDGPUUniformIntrinsicCombine::runOnFunction(Function &F) {
+ if (skipFunction(F)) {
+ return false;
+ }
+
+ const UniformityInfo *UI =
+ &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+
+ return AMDGPUUniformIntrinsicCombineImpl(UI).run(F);
+}
+
+PreservedAnalyses
+AMDGPUUniformIntrinsicCombinePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ const auto *UI = &AM.getResult<UniformityInfoAnalysis>(F);
+
+ // @todo check if it is required that this method must return bool, if so
+ // figure out what can be returned.
+ bool IsChanged = AMDGPUUniformIntrinsicCombineImpl(UI).run(F);
+
+ if (!IsChanged) {
+ return PreservedAnalyses::all();
+ }
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+bool AMDGPUUniformIntrinsicCombineImpl::run(Function &F) {
+
+ // @todo check if it is required that this method must return bool, if so
+ // figure out what can be returned.
+ const bool IsChanged{false};
+
+ // Iterate over each instruction in the function to get the desired intrinsic
+ // inst to check for optimization.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (auto *Intrinsic = dyn_cast<IntrinsicInst>(Call)) {
+ optimizeUniformIntrinsicInst(*Intrinsic);
+ }
+ }
+ }
+ }
+
+ return IsChanged;
+}
+
+void AMDGPUUniformIntrinsicCombineImpl::optimizeUniformIntrinsicInst(
+ IntrinsicInst &II) const {
+ llvm::Intrinsic::ID IID = II.getIntrinsicID();
+
+ switch (IID) {
+ case Intrinsic::amdgcn_permlane64: {
+ Value *Src = II.getOperand(0);
+ if (UI->isUniform(Src)) {
+ return II.replaceAllUsesWith(Src);
+ }
+ break;
+ }
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ Value *Srcv = II.getOperand(0);
+ if (UI->isUniform(Srcv)) {
+ return II.replaceAllUsesWith(Srcv);
+ }
+
+ // The rest of these may not be safe if the exec may not be the same between
+ // the def and use.
+ Value *Src = II.getArgOperand(0);
+ Instruction *SrcInst = dyn_cast<Instruction>(Src);
+ if (SrcInst && SrcInst->getParent() != II.getParent())
+ break;
+
+ // readfirstlane (readfirstlane x) -> readfirstlane x
----------------
jayfoad wrote:
I don't think you need any of these special cases. They should all be handled by the isUniform(Srcv) case above.
https://github.com/llvm/llvm-project/pull/116953
More information about the llvm-commits
mailing list