[llvm] [AMDGPU][TTI] Add Target Hook for Instruction Uniformity (getInstructionUniformity) (PR #137639)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 07:29:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pankaj Dwivedi (PankajDwivedi-25)
<details>
<summary>Changes</summary>
This patch introduces a new target hook `getInstructionUniformity(const Instruction &I)` in `TargetTransformInfo`, enabling targets to describe more complex relationships between operand uniformity and instruction uniformity.
Currently, UniformityAnalysis categorizes instructions into a fixed set of InstructionUniformity values (Default, AlwaysUniform, NeverUniform).
However, some instructions, particularly intrinsics, have operand-dependent uniformity behaviors that are not easily captured within this framework.
This hook allows targets to override and implement custom uniformity-propagation rules for such cases.
---
Full diff: https://github.com/llvm/llvm-project/pull/137639.diff
7 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+8)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+5)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+5)
- (modified) llvm/lib/Analysis/UniformityAnalysis.cpp (+13)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+21)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+2)
- (added) llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll (+25)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 022530dc846ea..9af5006ce9c6d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,13 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+ /// Target can implement more complex patterns for getting Uniformity of an
+ /// instruction.Currently Uniformity analysis catagorises instructions with a
+ /// fixed set of InstructionUniformity values: Default, AlwaysUniform and
+ /// NeverUniform.
+ std::optional<InstructionUniformity>
+ getInstructionUniformity(const Instruction &I) const;
+
private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 990252b1e5743..5bee462575181 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1147,6 +1147,11 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+ virtual std::optional<InstructionUniformity>
+ getInstructionUniformity(const Instruction &I) const {
+ return std::nullopt;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8548afea72964..50157a7714bf7 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}
+std::optional<InstructionUniformity>
+TargetTransformInfo::getInstructionUniformity(const Instruction &I) const {
+ return TTIImpl->getInstructionUniformity(I);
+}
+
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 2101fdfacfc8f..2fc6f523139a7 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -35,7 +35,20 @@ template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
markDivergent(I);
else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
+ else if (auto Uniformity = TTI->getInstructionUniformity(I)) {
+ switch (*Uniformity) {
+ case InstructionUniformity::AlwaysUniform:
+ addUniformOverride(I);
+ break;
+ case InstructionUniformity::NeverUniform:
+ markDivergent(I);
+ break;
+ case InstructionUniformity::Default:
+ break;
+ }
+ }
}
+
for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
markDivergent(&Arg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 204d3df546bbf..5c59847dfeb62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1422,3 +1422,24 @@ void GCNTTIImpl::collectKernelLaunchBounds(
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
}
+
+std::optional<InstructionUniformity>
+GCNTTIImpl::getInstructionUniformity(const Instruction &I) const {
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ // We can define the custom rules for the intrinsics uniformity, depending
+ // on argument.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_permlane64:
+ // If either operand is uniform, the result is uniform.
+ for (unsigned Arg_i = 0, NumArg = II->arg_size(); Arg_i < NumArg;
+ Arg_i++) {
+ if (!isSourceOfDivergence(II->getArgOperand(Arg_i)))
+ return InstructionUniformity::AlwaysUniform;
+ }
+ return InstructionUniformity::Default;
+ default:
+ break;
+ }
+ }
+ return std::nullopt;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index f6f7bd4bfcf5b..bea0b024d745b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -290,6 +290,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
+ std::optional<InstructionUniformity>
+ getInstructionUniformity(const Instruction &I) const override;
};
} // end namespace llvm
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..4bb89516b2e81
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,25 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) {
+ %v = call i32 @llvm.amdgcn.permlane64(i32 7)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
+ %v = call i32 @llvm.amdgcn.permlane64(i32 %src)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %tid)
+define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %v = call i32 @llvm.amdgcn.permlane64(i32 %tid)
+ %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ store i32 %v, i32 addrspace(1)* %out_ptr
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/137639
More information about the llvm-commits
mailing list