[llvm] [AMDGPU][TTI] Add Target Hook for Instruction Uniformity (getInstructionUniformity) (PR #137639)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 19 04:51:03 PDT 2025
https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/137639
>From e4cc7731ebaf7f49f48801856126aa8e4c5b77be Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Fri, 19 Sep 2025 17:20:37 +0530
Subject: [PATCH] currently users of a divergent value are marked as divergent,
instead target could extend
---
llvm/include/llvm/ADT/GenericUniformityImpl.h | 6 ++
llvm/include/llvm/ADT/Uniformity.h | 6 +-
.../llvm/Analysis/TargetTransformInfo.h | 9 +++
.../llvm/Analysis/TargetTransformInfoImpl.h | 6 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 7 +++
llvm/lib/Analysis/UniformityAnalysis.cpp | 56 ++++++++++++++-----
.../Target/AMDGPU/AMDGPUSearchableTables.td | 1 -
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 23 ++++++++
.../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 4 ++
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 2 +-
.../AMDGPU/uniform_intrinsic.ll | 11 ++++
11 files changed, 114 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index d10355fff1bea..92661eeb02604 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -51,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "uniformity"
@@ -406,6 +407,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
const CycleT *);
+ /// @brief Uniformity of any instruction operands.
+ /// @param I instruction.
+ /// @return vector containing boolean value for corrosponding operands.
+ llvm::SmallVector<InstructionUniformity>
+ getOperandUniformities(const Instruction &I) const;
protected:
/// \brief Value/block pair representing a single phi input.
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index 21ca106b80be3..93377225e6dbb 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -23,7 +23,11 @@ enum class InstructionUniformity {
AlwaysUniform,
/// The result values can never be assumed to be uniform.
- NeverUniform
+ NeverUniform,
+
+ /// The result value is uniform because one or more of its operand are
+ /// uniform.
+ Uniform
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 022530dc846ea..7b410c4acc1db 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,14 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+ /// Target can implement more complex patterns for getting Uniformity of an
+ /// instruction. Currently Uniformity analysis catagorises instructions with a
+ /// fixed set of InstructionUniformity values: Default, AlwaysUniform and
+ /// NeverUniform.
+ std::optional<InstructionUniformity> getInstructionUniformity(
+ const Instruction &I,
+ const SmallVector<InstructionUniformity> &OperandUniformities) const;
+
private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 990252b1e5743..996f391d48015 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1147,6 +1147,12 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+ virtual std::optional<InstructionUniformity> getInstructionUniformity(
+ const Instruction &I,
+ const SmallVector<InstructionUniformity> &OperandUniformities) const {
+ return std::nullopt;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8548afea72964..6eeac0ff2f813 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1476,6 +1476,13 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}
+std::optional<InstructionUniformity>
+TargetTransformInfo::getInstructionUniformity(
+ const Instruction &I,
+ const SmallVector<InstructionUniformity> &OperandUniformities) const {
+ return TTIImpl->getInstructionUniformity(I, OperandUniformities);
+}
+
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 2101fdfacfc8f..58532d9c3e615 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -29,6 +29,35 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
return markDivergent(cast<Value>(&Instr));
}
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+ const Use &U) const {
+ const auto *V = U.get();
+ if (isDivergent(V))
+ return true;
+ if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+ const auto *UseInstr = cast<Instruction>(U.getUser());
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+ }
+ return false;
+}
+
+template <>
+llvm::SmallVector<InstructionUniformity>
+llvm::GenericUniformityAnalysisImpl<SSAContext>::getOperandUniformities(
+ const Instruction &I) const {
+ SmallVector<InstructionUniformity> OperandUniformities;
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ if (!isa<Instruction>(I.getOperand(i)) && !isa<Argument>(I.getOperand(i)))
+ continue;
+ const Use &U = I.getOperandUse(i);
+ OperandUniformities.push_back(isDivergentUse(U)
+ ? InstructionUniformity::Default
+ : InstructionUniformity::Uniform);
+ }
+ return OperandUniformities;
+}
+
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
if (TTI->isSourceOfDivergence(&I))
@@ -47,9 +76,21 @@ template <>
void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
const Value *V) {
for (const auto *User : V->users()) {
- if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
+ const auto *UserInstr = dyn_cast<const Instruction>(User);
+ if (!UserInstr)
+ continue;
+
+ if (!TTI) {
markDivergent(*UserInstr);
+ continue;
}
+
+ auto Uniformity = TTI->getInstructionUniformity(
+ *UserInstr, getOperandUniformities(*UserInstr));
+ if (!Uniformity || *Uniformity == InstructionUniformity::Default)
+ markDivergent(*UserInstr); // fallback: conservative
+ else if (*Uniformity == InstructionUniformity::Uniform)
+ addUniformOverride(*UserInstr);
}
}
@@ -88,19 +129,6 @@ void llvm::GenericUniformityAnalysisImpl<
}
}
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
- const Use &U) const {
- const auto *V = U.get();
- if (isDivergent(V))
- return true;
- if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
- const auto *UseInstr = cast<Instruction>(U.getUser());
- return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
- }
- return false;
-}
-
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 3b62dcf3c92cd..059dba03f4cb6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -317,7 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
-def : SourceOfDivergence<int_amdgcn_permlane16>;
def : SourceOfDivergence<int_amdgcn_permlanex16>;
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 204d3df546bbf..98a33f7a96e43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1422,3 +1422,26 @@ void GCNTTIImpl::collectKernelLaunchBounds(
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
}
+
+std::optional<InstructionUniformity> GCNTTIImpl::getInstructionUniformity(
+ const Instruction &I,
+ const SmallVector<InstructionUniformity> &OperandUniformities) const {
+
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ const Function *CalledFunc = CI->getCalledFunction();
+ if (!CalledFunc)
+ return InstructionUniformity::Default;
+
+ if (CalledFunc->getIntrinsicID() == Intrinsic::amdgcn_permlane16) {
+ // Check if any operand is uniform.
+ for (InstructionUniformity Uniformity : OperandUniformities) {
+ if (Uniformity == InstructionUniformity::Uniform)
+ return InstructionUniformity::Uniform;
+ }
+ // If none of the operands is uniform, fall back to default
+ return InstructionUniformity::Default;
+ }
+ }
+
+ return InstructionUniformity::Default;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index f6f7bd4bfcf5b..21306964278ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -290,6 +290,10 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
+ std::optional<InstructionUniformity>
+ getInstructionUniformity(const Instruction &I,
+ const SmallVector<InstructionUniformity>
+ &OperandUniformities) const override;
};
} // end namespace llvm
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index bb840023daf5d..0a434b2a70653 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
ret void
}
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..e6c84bde81eff
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
More information about the llvm-commits
mailing list