[llvm] [AMDGPU][TTI] Add Target Hook for Instruction Uniformity (getInstructionUniformity) (PR #137639)

Pankaj Dwivedi via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 24 03:52:19 PDT 2025


https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/137639

>From 860b4859812c20b03e47e7eb25a9999188558c30 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 23 Sep 2025 17:25:29 +0530
Subject: [PATCH 1/3] [NFC] move isDivergentUse so later dependent function in
 pushUsers can safely use it

---
 llvm/lib/Analysis/UniformityAnalysis.cpp | 26 ++++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 2101fdfacfc8f..a4a68bb2a0753 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -29,6 +29,19 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
   return markDivergent(cast<Value>(&Instr));
 }
 
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+    const Use &U) const {
+  const auto *V = U.get();
+  if (isDivergent(V))
+    return true;
+  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+    const auto *UseInstr = cast<Instruction>(U.getUser());
+    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+  }
+  return false;
+}
+
 template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
   for (auto &I : instructions(F)) {
     if (TTI->isSourceOfDivergence(&I))
@@ -88,19 +101,6 @@ void llvm::GenericUniformityAnalysisImpl<
   }
 }
 
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
-    const Use &U) const {
-  const auto *V = U.get();
-  if (isDivergent(V))
-    return true;
-  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
-    const auto *UseInstr = cast<Instruction>(U.getUser());
-    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
-  }
-  return false;
-}
-
 // This ensures explicit instantiation of
 // GenericUniformityAnalysisImpl::ImplDeleter::operator()
 template class llvm::GenericUniformityInfo<SSAContext>;

>From 939c41093df2b1c8c9d29bf3aa912dbbff93474d Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 23 Sep 2025 17:43:13 +0530
Subject: [PATCH 2/3] add target hook to capture special operand uniformity and
 update UA to use it

---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 14 ++++++++++++++
 .../llvm/Analysis/TargetTransformInfo.h       |  3 +++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  4 ++++
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  5 +++++
 llvm/lib/Analysis/UniformityAnalysis.cpp      | 19 +++++++++++++++++++
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |  1 -
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      | 12 ++++++++++++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  1 +
 .../UniformityAnalysis/AMDGPU/intrinsics.ll   |  2 +-
 .../AMDGPU/uniform_intrinsic.ll               | 11 +++++++++++
 10 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index d10355fff1bea..1318f285717dd 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -51,6 +51,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Uniformity.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "uniformity"
@@ -407,6 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
                                 const CycleT *);
 
+  bool isAnyOperandUniform(const InstructionT &Instr) const;
+
+  /// \brief keep track of special target intrinsics that can be proven uniform.
+  void addSpecialUniformIntrinsic(const InstructionT &Instr);
+
 protected:
   /// \brief Value/block pair representing a single phi input.
   struct PhiInput {
@@ -429,6 +435,8 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   // Internal worklist for divergence propagation.
   std::vector<const InstructionT *> Worklist;
 
+  // Special intrinsics list which can be proven uniform.
+  llvm::SmallPtrSet<const InstructionT *, 8> SpecialUniformIntrinsics;
   /// \brief Mark \p Term as divergent and push all Instructions that become
   /// divergent as a result on the worklist.
   void analyzeControlDivergence(const InstructionT &Term);
@@ -824,6 +832,12 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
   UniformOverrides.insert(&Instr);
 }
 
+template <typename ContextT>
+void GenericUniformityAnalysisImpl<ContextT>::addSpecialUniformIntrinsic(
+    const InstructionT &Instr) {
+  SpecialUniformIntrinsics.insert(&Instr);
+}
+
 // Mark as divergent all external uses of values defined in \p DefCycle.
 //
 // A value V defined by a block B inside \p DefCycle may be used outside the
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 022530dc846ea..c5e0b56d5f91c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Uniformity.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/IR/FMF.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,8 @@ class TargetTransformInfo {
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
 
+  bool isSpecialUniformIntrinsic(const Instruction &I) const;
+
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
 };
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 990252b1e5743..b1d780162ac9e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1147,6 +1147,10 @@ class TargetTransformInfoImplBase {
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
 
+  virtual bool isSpecialUniformIntrinsic(const Instruction &I) const {
+    return false;
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8548afea72964..15083a6c50e02 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
   return TTIImpl->collectKernelLaunchBounds(F, LB);
 }
 
+bool TargetTransformInfo::isSpecialUniformIntrinsic(
+    const Instruction &I) const {
+  return TTIImpl->isSpecialUniformIntrinsic(I);
+}
+
 TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index a4a68bb2a0753..b71e82b73130f 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -42,12 +42,26 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
   return false;
 }
 
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isAnyOperandUniform(
+    const Instruction &I) const {
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    if (!isa<Instruction>(I.getOperand(i)) && !isa<Argument>(I.getOperand(i)))
+      continue;
+    if (!isDivergentUse(I.getOperandUse(i)))
+      return true;
+  }
+  return false;
+}
+
 template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
   for (auto &I : instructions(F)) {
     if (TTI->isSourceOfDivergence(&I))
       markDivergent(I);
     else if (TTI->isAlwaysUniform(&I))
       addUniformOverride(I);
+    else if (TTI->isSpecialUniformIntrinsic(I))
+      addSpecialUniformIntrinsic(I);
   }
   for (auto &Arg : F.args()) {
     if (TTI->isSourceOfDivergence(&Arg)) {
@@ -61,6 +75,11 @@ void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
     const Value *V) {
   for (const auto *User : V->users()) {
     if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
+      if (SpecialUniformIntrinsics.count(UserInstr) &&
+          isAnyOperandUniform(*UserInstr)) {
+        addUniformOverride(*UserInstr);
+        continue;
+      }
       markDivergent(*UserInstr);
     }
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 3b62dcf3c92cd..059dba03f4cb6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -317,7 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
-def : SourceOfDivergence<int_amdgcn_permlane16>;
 def : SourceOfDivergence<int_amdgcn_permlanex16>;
 def : SourceOfDivergence<int_amdgcn_permlane16_var>;
 def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 204d3df546bbf..bd404de412c1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1422,3 +1422,15 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+bool GCNTTIImpl::isSpecialUniformIntrinsic(const Instruction &I) const {
+  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::amdgcn_permlane16:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index f6f7bd4bfcf5b..ed169efbeb047 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -290,6 +290,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   void collectKernelLaunchBounds(
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
+  bool isSpecialUniformIntrinsic(const Instruction &I) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index bb840023daf5d..0a434b2a70653 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
   ret void
 }
 
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
 define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v, ptr addrspace(1) %out
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..e6c84bde81eff
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From caa735447850a05d41654abfca75f93dfc91a336 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Wed, 24 Sep 2025 16:22:05 +0530
Subject: [PATCH 3/3] add intrinsic permlanex16

---
 llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td          | 1 -
 llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp      | 1 +
 .../test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll | 2 +-
 .../UniformityAnalysis/AMDGPU/uniform_intrinsic.ll        | 8 ++++++++
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 059dba03f4cb6..da3776761ab34 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -317,7 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
-def : SourceOfDivergence<int_amdgcn_permlanex16>;
 def : SourceOfDivergence<int_amdgcn_permlane16_var>;
 def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
 def : SourceOfDivergence<int_amdgcn_mov_dpp>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index bd404de412c1a..9acf536f86eba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1427,6 +1427,7 @@ bool GCNTTIImpl::isSpecialUniformIntrinsic(const Instruction &I) const {
   if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::amdgcn_permlane16:
+    case Intrinsic::amdgcn_permlanex16:
       return true;
     default:
       return false;
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index 0a434b2a70653..f209c996c7692 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -14,7 +14,7 @@ define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i3
   ret void
 }
 
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
 define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v, ptr addrspace(1) %out
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
index e6c84bde81eff..e5eb5ebebf897 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -7,5 +7,13 @@ define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i3
   store i32 %v, ptr addrspace(1) %out
   ret void
 }
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}



More information about the llvm-commits mailing list