[llvm] [AMDGPU] Introduce "amdgpu-uniform-intrinsic-combine" pass to combine uniform AMDGPU lane Intrinsics. (PR #116953)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 06:19:38 PDT 2025
================
@@ -0,0 +1,161 @@
+//===-- AMDGPUUniformIntrinsicCombine.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass simplifies certain intrinsic calls when the arguments are uniform.
+/// It's true that this pass has transforms that can lead to a situation where
+/// some instruction whose operand was previously recognized as statically
+/// uniform is later on no longer recognized as statically uniform. However, the
+/// semantics of how programs execute don't (and must not, for this precise
+/// reason) care about static uniformity, they only ever care about dynamic
+/// uniformity. And every instruction that's downstream and cares about dynamic
+/// uniformity must be convergent (and isel will introduce v_readfirstlane for
+/// them if their operands can't be proven statically uniform).
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+using namespace llvm::PatternMatch;
+
+/// Tracks uniformity of newly created instructions.
+using UniformityTracker = ValueMap<const Value *, bool>;
+
+/// Wrapper for querying uniformity info that first checks new instructions.
+static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI,
+ const UniformityTracker &Tracker) {
+ Value *V = U.get();
+ if (auto It = Tracker.find(V); It != Tracker.end())
+ return !It->second; // divergent if marked false
+ return UI.isDivergentUse(U);
+}
+
+/// Optimizes uniform intrinsics.
+static bool optimizeUniformIntrinsic(IntrinsicInst &II,
+ const UniformityInfo &UI,
+ UniformityTracker &Tracker) {
+ llvm::Intrinsic::ID IID = II.getIntrinsicID();
+
+ switch (IID) {
+ case Intrinsic::amdgcn_permlane64:
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ Value *Src = II.getArgOperand(0);
+ if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
+ return false;
+ LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');
+ II.replaceAllUsesWith(Src);
+ II.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::amdgcn_ballot: {
+ Value *Src = II.getArgOperand(0);
+ if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
+ return false;
+ LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');
+
+ // If there are no ICmp users, return early.
+ if (none_of(II.users(), [](User *U) { return isa<ICmpInst>(U); }))
+ return false;
----------------
arsenm wrote:
I'd just drop this. It's an extra use list walk
https://github.com/llvm/llvm-project/pull/116953
More information about the llvm-commits
mailing list