[llvm] [AMDGPU][TTI] Add Target Hook for Instruction Uniformity (getInstructionUniformity) (PR #137639)

Mon Apr 28 07:29:43 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Pankaj Dwivedi (PankajDwivedi-25)

<details>
<summary>Changes</summary>

This patch introduces a new target hook `getInstructionUniformity(const Instruction &I)` in `TargetTransformInfo`, enabling targets to describe more complex relationships between operand uniformity and instruction uniformity.

Currently, UniformityAnalysis categorizes instructions into a fixed set of InstructionUniformity values (Default, AlwaysUniform, NeverUniform).
However, some instructions, particularly intrinsics, have operand-dependent uniformity behaviors that are not easily captured within this framework.

This hook allows targets to override and implement custom uniformity-propagation rules for such cases.

---
Full diff: https://github.com/llvm/llvm-project/pull/137639.diff


7 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+8) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+5) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+5) 
- (modified) llvm/lib/Analysis/UniformityAnalysis.cpp (+13) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+21) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+2) 
- (added) llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll (+25) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 022530dc846ea..9af5006ce9c6d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Uniformity.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/IR/FMF.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,13 @@ class TargetTransformInfo {
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
 
+  /// Target can implement more complex patterns for getting Uniformity of an
+  /// instruction.Currently Uniformity analysis catagorises instructions with a
+  /// fixed set of InstructionUniformity values: Default, AlwaysUniform and
+  /// NeverUniform.
+  std::optional<InstructionUniformity>
+  getInstructionUniformity(const Instruction &I) const;
+
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
 };
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 990252b1e5743..5bee462575181 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1147,6 +1147,11 @@ class TargetTransformInfoImplBase {
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
 
+  virtual std::optional<InstructionUniformity>
+  getInstructionUniformity(const Instruction &I) const {
+    return std::nullopt;
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8548afea72964..50157a7714bf7 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
   return TTIImpl->collectKernelLaunchBounds(F, LB);
 }
 
+std::optional<InstructionUniformity>
+TargetTransformInfo::getInstructionUniformity(const Instruction &I) const {
+  return TTIImpl->getInstructionUniformity(I);
+}
+
 TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 2101fdfacfc8f..2fc6f523139a7 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -35,7 +35,20 @@ template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
       markDivergent(I);
     else if (TTI->isAlwaysUniform(&I))
       addUniformOverride(I);
+    else if (auto Uniformity = TTI->getInstructionUniformity(I)) {
+      switch (*Uniformity) {
+      case InstructionUniformity::AlwaysUniform:
+        addUniformOverride(I);
+        break;
+      case InstructionUniformity::NeverUniform:
+        markDivergent(I);
+        break;
+      case InstructionUniformity::Default:
+        break;
+      }
+    }
   }
+
   for (auto &Arg : F.args()) {
     if (TTI->isSourceOfDivergence(&Arg)) {
       markDivergent(&Arg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 204d3df546bbf..5c59847dfeb62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1422,3 +1422,24 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+std::optional<InstructionUniformity>
+GCNTTIImpl::getInstructionUniformity(const Instruction &I) const {
+  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+    // We can define the custom rules for the intrinsics uniformity, depending
+    // on argument.
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::amdgcn_permlane64:
+      // If either operand is uniform, the result is uniform.
+      for (unsigned Arg_i = 0, NumArg = II->arg_size(); Arg_i < NumArg;
+           Arg_i++) {
+        if (!isSourceOfDivergence(II->getArgOperand(Arg_i)))
+          return InstructionUniformity::AlwaysUniform;
+      }
+      return InstructionUniformity::Default;
+    default:
+      break;
+    }
+  }
+  return std::nullopt;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index f6f7bd4bfcf5b..bea0b024d745b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -290,6 +290,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   void collectKernelLaunchBounds(
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
+  std::optional<InstructionUniformity>
+  getInstructionUniformity(const Instruction &I) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..4bb89516b2e81
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,25 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) {
+  %v = call i32 @llvm.amdgcn.permlane64(i32 7)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
+  %v = call i32 @llvm.amdgcn.permlane64(i32 %src)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %tid)
+define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %v = call i32 @llvm.amdgcn.permlane64(i32 %tid)
+  %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+  store i32 %v, i32 addrspace(1)* %out_ptr
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/137639