[llvm] [TTI] Introduce getInstructionUniformity API for flexible uniformity analysis (PR #137639)

Tue Dec 16 05:00:03 PST 2025

https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/137639

>From e42b908abfdd643c53c07a7b1abeb52ba1935088 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 23 Sep 2025 17:25:29 +0530
Subject: [PATCH 01/10] [NFC] move isDivergentUse so later dependent function
 in pushUsers can safely use it

---
 llvm/lib/Analysis/UniformityAnalysis.cpp | 26 ++++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index b56534935d7c2..f6cf67463c8a1 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -29,6 +29,19 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
   return markDivergent(cast<Value>(&Instr));
 }
 
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+    const Use &U) const {
+  const auto *V = U.get();
+  if (isDivergent(V))
+    return true;
+  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+    const auto *UseInstr = cast<Instruction>(U.getUser());
+    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+  }
+  return false;
+}
+
 template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
   for (auto &I : instructions(F)) {
     InstructionUniformity IU = TTI->getInstructionUniformity(&I);
@@ -95,19 +108,6 @@ void llvm::GenericUniformityAnalysisImpl<
   }
 }
 
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
-    const Use &U) const {
-  const auto *V = U.get();
-  if (isDivergent(V))
-    return true;
-  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
-    const auto *UseInstr = cast<Instruction>(U.getUser());
-    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
-  }
-  return false;
-}
-
 // This ensures explicit instantiation of
 // GenericUniformityAnalysisImpl::ImplDeleter::operator()
 template class llvm::GenericUniformityInfo<SSAContext>;

>From 38f1c8f396ad00d14c6f718c72e08ade85404dd2 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Sat, 1 Nov 2025 02:02:14 +0530
Subject: [PATCH 02/10] add target hook to capture special operand uniformity
 and update UA to use it

---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 22 +++++++
 llvm/include/llvm/ADT/Uniformity.h            |  5 +-
 .../llvm/Analysis/TargetTransformInfo.h       |  1 +
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  4 ++
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  5 ++
 llvm/lib/Analysis/UniformityAnalysis.cpp      | 39 ++++++++----
 .../lib/CodeGen/MachineUniformityAnalysis.cpp | 11 ++++
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |  2 -
 .../UniformityAnalysis/AMDGPU/intrinsics.ll   |  4 +-
 .../AMDGPU/uniform_intrinsic.ll               | 59 +++++++++++++++++++
 10 files changed, 134 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index 7fb0dbe22f12f..fdc714b5fa778 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -51,6 +51,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Uniformity.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "uniformity"
@@ -407,6 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
                                 const CycleT *);
 
+  bool isOperandUniform(const InstructionT &I, InstructionUniformity IU) const;
+
+  /// \brief keep track of target instruction that can be proven uniform.
+  void addUniformInstruction(const InstructionT *I, InstructionUniformity IU);
+
 protected:
   const ContextT &Context;
   const FunctionT &F;
@@ -420,6 +426,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   // Internal worklist for divergence propagation.
   std::vector<const InstructionT *> Worklist;
 
+  // Map containing tracked instruction that can be proven uniform based on its
+  // operand Uniformity.
+  llvm::DenseMap<const InstructionT *, InstructionUniformity>
+      UniformInstruction;
+
   /// \brief Mark \p Term as divergent and push all Instructions that become
   /// divergent as a result on the worklist.
   void analyzeControlDivergence(const InstructionT &Term);
@@ -785,6 +796,11 @@ void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
     const InstructionT &I) {
   if (isAlwaysUniform(I))
     return;
+  auto It = UniformInstruction.find(&I);
+  if (It != UniformInstruction.end() && isOperandUniform(I, It->second)) {
+    addUniformOverride(I);
+    return;
+  }
   bool Marked = false;
   if (I.isTerminator()) {
     Marked = DivergentTermBlocks.insert(I.getParent()).second;
@@ -816,6 +832,12 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
   UniformOverrides.insert(&Instr);
 }
 
+template <typename ContextT>
+void GenericUniformityAnalysisImpl<ContextT>::addUniformInstruction(
+    const InstructionT *I, InstructionUniformity IU) {
+  UniformInstruction[I] = IU;
+}
+
 // Mark as divergent all external uses of values defined in \p DefCycle.
 //
 // A value V defined by a block B inside \p DefCycle may be used outside the
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index 21ca106b80be3..9571d43b8a9b9 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -23,7 +23,10 @@ enum class InstructionUniformity {
   AlwaysUniform,
 
   /// The result values can never be assumed to be uniform.
-  NeverUniform
+  NeverUniform,
+
+  /// Result value can be uniform if either of first two operand are uniform.
+  EitherOfFirstTwoOp
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 5a4eb8daf0af6..c41e99dcbe4a7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -2002,6 +2002,7 @@ class TargetTransformInfo {
   /// Returns true if GEP should not be used to index into vectors for this
   /// target.
   LLVM_ABI bool allowVectorElementIndexingUsingGEP() const;
+  InstructionUniformity getInstructionUniformity(const Instruction &I) const;
 
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 74857a5b83aba..7328a9be751f0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1150,6 +1150,10 @@ class TargetTransformInfoImplBase {
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
 
   virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
+  virtual InstructionUniformity
+  getInstructionUniformity(const Instruction &I) const {
+    return InstructionUniformity::Default;
+  }
 
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2961d9361e5fa..702276a187ed8 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1499,6 +1499,11 @@ bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const {
   return TTIImpl->allowVectorElementIndexingUsingGEP();
 }
 
+InstructionUniformity
+TargetTransformInfo::getInstructionUniformity(const Instruction &I) const {
+  return TTIImpl->getInstructionUniformity(I);
+}
+
 TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index f6cf67463c8a1..9b7cb33d7f76a 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/ADT/Uniformity.h"
 #include "llvm/Analysis/CycleAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -29,19 +30,6 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
   return markDivergent(cast<Value>(&Instr));
 }
 
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
-    const Use &U) const {
-  const auto *V = U.get();
-  if (isDivergent(V))
-    return true;
-  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
-    const auto *UseInstr = cast<Instruction>(U.getUser());
-    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
-  }
-  return false;
-}
-
 template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
   for (auto &I : instructions(F)) {
     InstructionUniformity IU = TTI->getInstructionUniformity(&I);
@@ -108,6 +96,31 @@ void llvm::GenericUniformityAnalysisImpl<
   }
 }
 
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+    const Use &U) const {
+  const auto *V = U.get();
+  if (isDivergent(V))
+    return true;
+  if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+    const auto *UseInstr = cast<Instruction>(U.getUser());
+    return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+  }
+  return false;
+}
+
+template <>
+bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
+    const Instruction &I, InstructionUniformity IU) const {
+  switch (IU) {
+  case InstructionUniformity::EitherOfFirstTwoOp:
+    return !isDivergentUse(I.getOperandUse(0)) ||
+           !isDivergentUse(I.getOperandUse(1));
+  default:
+    return false;
+  }
+}
+
 // This ensures explicit instantiation of
 // GenericUniformityAnalysisImpl::ImplDeleter::operator()
 template class llvm::GenericUniformityInfo<SSAContext>;
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 238d29d386574..d334741f7db17 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -151,6 +151,17 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
   return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
 }
 
+template <>
+bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
+    const MachineInstr &I, InstructionUniformity IU) const {
+  switch (IU) {
+  case InstructionUniformity::EitherOfFirstTwoOp:
+    return !isDivergentUse(I.getOperand(0)) || !isDivergentUse(I.getOperand(1));
+  default:
+    return false;
+  }
+}
+
 // This ensures explicit instantiation of
 // GenericUniformityAnalysisImpl::ImplDeleter::operator()
 template class llvm::GenericUniformityInfo<MachineSSAContext>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 58a9b5511f2d0..b15b08650168c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -315,8 +315,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
 def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
-def : SourceOfDivergence<int_amdgcn_permlane16>;
-def : SourceOfDivergence<int_amdgcn_permlanex16>;
 def : SourceOfDivergence<int_amdgcn_permlane16_var>;
 def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
 def : SourceOfDivergence<int_amdgcn_permlane_bcast>;
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index d5c6000a1eef6..d32f3d3284bc3 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -7,14 +7,14 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
   ret void
 }
 
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
 define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v, ptr addrspace(1) %out
   ret void
 }
 
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
 define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v, ptr addrspace(1) %out
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..37be465a7796b
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_usr_x16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v1, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_x16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v1, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_usr_16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v1, ptr addrspace(1) %out
+  ret void
+}
+
+; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  store i32 %v1, ptr addrspace(1) %out
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 9a4d2c3e51b3a42f9fc57d47eb3d98862770353d Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 4 Nov 2025 14:21:00 +0530
Subject: [PATCH 03/10] update enum name for more clarity

---
 llvm/include/llvm/ADT/Uniformity.h             | 5 +++--
 llvm/lib/Analysis/UniformityAnalysis.cpp       | 2 +-
 llvm/lib/CodeGen/MachineUniformityAnalysis.cpp | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index 9571d43b8a9b9..ed558b004d322 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -25,8 +25,9 @@ enum class InstructionUniformity {
   /// The result values can never be assumed to be uniform.
   NeverUniform,
 
-  /// Result value can be uniform if either of first two operand are uniform.
-  EitherOfFirstTwoOp
+  /// Result value can be uniform if any of the first two use operand are
+  /// uniform.
+  AnyOfFirstTwoUseOp
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 9b7cb33d7f76a..494ba272aecf7 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -113,7 +113,7 @@ template <>
 bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
     const Instruction &I, InstructionUniformity IU) const {
   switch (IU) {
-  case InstructionUniformity::EitherOfFirstTwoOp:
+  case InstructionUniformity::AnyOfFirstTwoUseOp:
     return !isDivergentUse(I.getOperandUse(0)) ||
            !isDivergentUse(I.getOperandUse(1));
   default:
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index d334741f7db17..339499890c595 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -155,8 +155,8 @@ template <>
 bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
     const MachineInstr &I, InstructionUniformity IU) const {
   switch (IU) {
-  case InstructionUniformity::EitherOfFirstTwoOp:
-    return !isDivergentUse(I.getOperand(0)) || !isDivergentUse(I.getOperand(1));
+  case InstructionUniformity::AnyOfFirstTwoUseOp:
+    return !isDivergentUse(I.getOperand(1)) || !isDivergentUse(I.getOperand(2));
   default:
     return false;
   }

>From 18b0a4019ea1f550dbf0c53c5b4f89184933d273 Mon Sep 17 00:00:00 2001
From: Pankaj Dwivedi <divedi.pk.117 at gmail.com>
Date: Wed, 5 Nov 2025 10:53:47 +0530
Subject: [PATCH 04/10] Apply suggestion from @arsenm

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index fdc714b5fa778..12192d02adc65 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -428,7 +428,7 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
 
   // Map containing tracked instruction that can be proven uniform based on its
   // operand Uniformity.
-  llvm::DenseMap<const InstructionT *, InstructionUniformity>
+  DenseMap<const InstructionT *, InstructionUniformity>
       UniformInstruction;
 
   /// \brief Mark \p Term as divergent and push all Instructions that become

>From 7513929949503c9890dd239074598eab048224bc Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Mon, 17 Nov 2025 15:17:36 +0530
Subject: [PATCH 05/10] let getInstructionUniformity hook wrap
 isSourceOfDivergence/isAlwaysUniform

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h     |  3 ++-
 llvm/include/llvm/Analysis/TargetTransformInfoImpl.h |  7 +++++--
 llvm/lib/Analysis/TargetTransformInfo.cpp            |  4 ++--
 llvm/lib/CodeGen/MachineUniformityAnalysis.cpp       | 10 +++-------
 llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp |  2 ++
 llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h     |  1 -
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c41e99dcbe4a7..384e76ebfd1b3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -2002,7 +2002,8 @@ class TargetTransformInfo {
   /// Returns true if GEP should not be used to index into vectors for this
   /// target.
   LLVM_ABI bool allowVectorElementIndexingUsingGEP() const;
-  InstructionUniformity getInstructionUniformity(const Instruction &I) const;
+
+  InstructionUniformity getInstructionUniformity(const Value *V) const;
 
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7328a9be751f0..51ed2c692b4f0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1150,8 +1150,11 @@ class TargetTransformInfoImplBase {
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
 
   virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
-  virtual InstructionUniformity
-  getInstructionUniformity(const Instruction &I) const {
+
+  // New API for uniformity classification
+  // Targets should override this to provide target-specific uniformity analysis
+  // The default implementation returns Default (conservative behavior)
+  virtual InstructionUniformity getInstructionUniformity(const Value *V) const {
     return InstructionUniformity::Default;
   }
 
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 702276a187ed8..e4db965251e40 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1500,8 +1500,8 @@ bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const {
 }
 
 InstructionUniformity
-TargetTransformInfo::getInstructionUniformity(const Instruction &I) const {
-  return TTIImpl->getInstructionUniformity(I);
+TargetTransformInfo::getInstructionUniformity(const Value *V) const {
+  return TTIImpl->getInstructionUniformity(V);
 }
 
 TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 339499890c595..7bb7fb0d9a30f 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -151,15 +151,11 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
   return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
 }
 
+// This can be defined later depending on use of the MachineUniformityAnalysis.
 template <>
 bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
-    const MachineInstr &I, InstructionUniformity IU) const {
-  switch (IU) {
-  case InstructionUniformity::AnyOfFirstTwoUseOp:
-    return !isDivergentUse(I.getOperand(1)) || !isDivergentUse(I.getOperand(2));
-  default:
-    return false;
-  }
+    const MachineInstr &MI, InstructionUniformity IU) const {
+  return false;
 }
 
 // This ensures explicit instantiation of
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 35406a387cf0f..5b541a7063e33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1652,6 +1652,8 @@ unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) const {
   return BaseT::getNumberOfParts(Tp);
 }
 
+// New API that wraps the old isSourceOfDivergence and isAlwaysUniform APIs
+// with additional support for new uniformity classifications
 InstructionUniformity
 GCNTTIImpl::getInstructionUniformity(const Value *V) const {
   if (isAlwaysUniform(V))
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index ae12a6ea3baa3..4b2961ac4a422 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -203,7 +203,6 @@ class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
     // Self-referential globals are not supported.
     return false;
   }
-
   InstructionUniformity getInstructionUniformity(const Value *V) const override;
 };
 

>From c4dfc3992190595613937b36c72612ce723dab96 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Wed, 19 Nov 2025 17:38:58 +0530
Subject: [PATCH 06/10] update the operand check & update machine inst
 uniformity

---
 llvm/lib/Analysis/UniformityAnalysis.cpp      |  7 +-
 .../lib/CodeGen/MachineUniformityAnalysis.cpp | 35 +++++++-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  7 ++
 .../AMDGPU/MIR/uniform-permlane.mir           | 86 +++++++++++++++++++
 .../AMDGPU/uniform_intrinsic.ll               |  8 +-
 5 files changed, 135 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir

diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 494ba272aecf7..9a02b65f97bc5 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -114,8 +114,11 @@ bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
     const Instruction &I, InstructionUniformity IU) const {
   switch (IU) {
   case InstructionUniformity::AnyOfFirstTwoUseOp:
-    return !isDivergentUse(I.getOperandUse(0)) ||
-           !isDivergentUse(I.getOperandUse(1));
+    // For permlane16/permlanex16: <old> <src0> <src1> <src2> <fi>
+    // <bound_control> Check if either src0 (operand 1) or src1 (operand 2 -
+    // lane select) is uniform
+    return !isDivergentUse(I.getOperandUse(1)) ||
+           !isDivergentUse(I.getOperandUse(2));
   default:
     return false;
   }
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 7bb7fb0d9a30f..d4fed2b456bd1 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -151,11 +151,42 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
   return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
 }
 
-// This can be defined later depending on use of the MachineUniformityAnalysis.
 template <>
 bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
     const MachineInstr &MI, InstructionUniformity IU) const {
-  return false;
+  switch (IU) {
+  // For permlane16/permlanex16, check if either src or lane select is uniform
+  // These instructions have mixed immediate and register operands:
+  // Operand 1 is src0 (the source value to permute)
+  // Operand 3 is src1 (lane select - which lane within the 16 to read from)
+  // Result is uniform if EITHER the source OR lane select is uniform
+  case InstructionUniformity::AnyOfFirstTwoUseOp: {
+    // Check if any of the first two register use operands is uniform
+    // Result is uniform if ANY of these operands is uniform
+    const MachineOperand *FirstRegOp = nullptr;
+    const MachineOperand *SecondRegOp = nullptr;
+
+    // Find the first two register use operands
+    for (const MachineOperand &MO : MI.uses()) {
+      if (MO.isReg() && MO.getReg().isVirtual()) {
+        if (!FirstRegOp)
+          FirstRegOp = &MO;
+        else if (!SecondRegOp) {
+          SecondRegOp = &MO;
+          break;
+        }
+      }
+    }
+
+    if (!FirstRegOp || !SecondRegOp)
+      return false;
+
+    // Return true if either operand is uniform
+    return !isDivergentUse(*FirstRegOp) || !isDivergentUse(*SecondRegOp);
+  }
+  default:
+    return false;
+  }
 }
 
 // This ensures explicit instantiation of
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9d189515f2bbb..8e15943c78998 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10628,6 +10628,13 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
     return InstructionUniformity::NeverUniform;
 
   unsigned opcode = MI.getOpcode();
+
+  // Special handling for permlane16/permlanex16 - uniformity depends on
+  // operands
+  if (opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
+      opcode == AMDGPU::V_PERMLANEX16_B32_e64)
+    return InstructionUniformity::AnyOfFirstTwoUseOp;
+
   if (opcode == AMDGPU::V_READLANE_B32 ||
       opcode == AMDGPU::V_READFIRSTLANE_B32 ||
       opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir
new file mode 100644
index 0000000000000..f08d16affef23
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir
@@ -0,0 +1,86 @@
+# RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+
+# Test the machine-level uniformity analysis for permlane16/permlanex16 instructions.
+#
+# NOTE: Permlane instructions have a hardware constraint that src1 (lane select) and src2
+# must be SGPR (scalar) registers. Since SGPRs are always uniform at machine level, 
+# permlane results are always uniform according to the AnyOfFirstTwoUseOp logic
+# (either src0 OR src1 being uniform makes the result uniform, and src1 is always uniform).
+#
+# These tests verify that the uniformity analysis correctly handles permlane instructions
+# and that uniform results propagate through chains of operations.
+
+---
+# Test: permlane16 with divergent VGPR src and uniform SGPR lane select
+# Result is UNIFORM because lane select (SGPR) is always uniform
+name: permlane16_basic
+machineFunctionInfo:
+  isEntryFunction: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: MachineUniformityInfo for function: permlane16_basic
+    ; CHECK: ALL VALUES UNIFORM
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 5
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE16_B32_e64 0, %0, 0, %1, 0, %2, %0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+---
+# Test: permlanex16 with divergent VGPR src and uniform SGPR lane select
+# Result is UNIFORM because lane select (SGPR) is always uniform
+name: permlanex16_basic
+machineFunctionInfo:
+  isEntryFunction: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: MachineUniformityInfo for function: permlanex16_basic
+    ; CHECK: ALL VALUES UNIFORM
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 7
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANEX16_B32_e64 0, %0, 0, %1, 0, %2, %0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+---
+# Test: Chain of permlane operations - uniformity propagates
+# Both permlanes are uniform, second uses result of first as source
+name: permlane16_chain_uniform
+machineFunctionInfo:
+  isEntryFunction: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: MachineUniformityInfo for function: permlane16_chain_uniform
+    ; CHECK: ALL VALUES UNIFORM
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 3
+    %2:sreg_32 = IMPLICIT_DEF
+    ; First permlane - uniform because lane select is SGPR
+    %3:vgpr_32 = V_PERMLANE16_B32_e64 0, %0, 0, %1, 0, %2, %0, 0, implicit $exec
+    ; Second permlane uses uniform result - also uniform
+    %4:vgpr_32 = V_PERMLANEX16_B32_e64 0, %3, 0, %1, 0, %2, %3, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+---
+# Test: Multiple permlane operations in sequence
+# Verifies that uniformity is correctly tracked through complex chains
+name: permlane_multiple
+machineFunctionInfo:
+  isEntryFunction: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: MachineUniformityInfo for function: permlane_multiple
+    ; CHECK: ALL VALUES UNIFORM
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = S_MOV_B32 1
+    %2:sreg_32 = S_MOV_B32 2  
+    %3:vgpr_32 = V_PERMLANE16_B32_e64 0, %0, 0, %1, 0, %2, %0, 0, implicit $exec
+    %4:vgpr_32 = V_PERMLANEX16_B32_e64 0, %3, 0, %1, 0, %2, %3, 0, implicit $exec
+    %5:vgpr_32 = V_PERMLANE16_B32_e64 0, %4, 0, %2, 0, %1, %4, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
index 37be465a7796b..e7391ee0c265b 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -16,11 +16,11 @@ define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i
 }
 
 ; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
-; CHECK:               %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %v, i32 %src1, i32 %src2, i1 false, i1 false)
 ; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
 define amdgpu_kernel void @div_permlane16_var_uni_usr_x16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
-  %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %v, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v1, ptr addrspace(1) %out
   ret void
 }
@@ -36,11 +36,11 @@ define amdgpu_kernel void @div_permlane16_var_uni_x16(ptr addrspace(1) %out, i32
 }
 
 ; CHECK:  DIVERGENT:   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
-; CHECK:               %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK:               %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %v, i32 %src1, i32 %src2, i1 false, i1 false)
 ; CHECK:               store i32 %v1, ptr addrspace(1) %out, align 4
 define amdgpu_kernel void @div_permlane16_var_uni_usr_16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
   %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
-  %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+  %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %v, i32 %src1, i32 %src2, i1 false, i1 false) #0
   store i32 %v1, ptr addrspace(1) %out
   ret void
 }

>From d2543e62789abeae3b311bf8a94a9872724210c6 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Wed, 19 Nov 2025 18:59:57 +0530
Subject: [PATCH 07/10] Fix formatting

---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index 12192d02adc65..307d51ac25288 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -1,4 +1,4 @@
-//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
+//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -428,8 +428,7 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
 
   // Map containing tracked instruction that can be proven uniform based on its
   // operand Uniformity.
-  DenseMap<const InstructionT *, InstructionUniformity>
-      UniformInstruction;
+  DenseMap<const InstructionT *, InstructionUniformity> UniformInstruction;
 
   /// \brief Mark \p Term as divergent and push all Instructions that become
   /// divergent as a result on the worklist.

>From b311e4b2fc230a217eebf0a8b27f4bb6952b700b Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Wed, 19 Nov 2025 19:50:03 +0530
Subject: [PATCH 08/10] update mir test check

---
 .../UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir
index f08d16affef23..da6048d86b2dd 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uniform-permlane.mir
@@ -18,7 +18,7 @@ machineFunctionInfo:
   isEntryFunction: true
 body: |
   bb.0:
-    ; CHECK-LABEL: MachineUniformityInfo for function: permlane16_basic
+    ; CHECK-LABEL: MachineUniformityInfo for function: @permlane16_basic
     ; CHECK: ALL VALUES UNIFORM
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = S_MOV_B32 5
@@ -35,7 +35,7 @@ machineFunctionInfo:
   isEntryFunction: true
 body: |
   bb.0:
-    ; CHECK-LABEL: MachineUniformityInfo for function: permlanex16_basic
+    ; CHECK-LABEL: MachineUniformityInfo for function: @permlanex16_basic
     ; CHECK: ALL VALUES UNIFORM
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = S_MOV_B32 7
@@ -52,7 +52,7 @@ machineFunctionInfo:
   isEntryFunction: true
 body: |
   bb.0:
-    ; CHECK-LABEL: MachineUniformityInfo for function: permlane16_chain_uniform
+    ; CHECK-LABEL: MachineUniformityInfo for function: @permlane16_chain_uniform
     ; CHECK: ALL VALUES UNIFORM
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = S_MOV_B32 3
@@ -72,7 +72,7 @@ machineFunctionInfo:
   isEntryFunction: true
 body: |
   bb.0:
-    ; CHECK-LABEL: MachineUniformityInfo for function: permlane_multiple
+    ; CHECK-LABEL: MachineUniformityInfo for function: @permlane_multiple
     ; CHECK: ALL VALUES UNIFORM
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = S_MOV_B32 1

>From 8bc93dd4d7716f849718ffc3caffc2578ba7fdf6 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Thu, 20 Nov 2025 16:30:26 +0530
Subject: [PATCH 09/10] seperate complex target based custom logic through
 target hook

---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 15 ++++--
 llvm/include/llvm/ADT/Uniformity.h            |  7 +--
 .../llvm/Analysis/TargetTransformInfo.h       | 12 +++++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  8 +++
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   | 16 ++++++
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  5 ++
 llvm/lib/Analysis/UniformityAnalysis.cpp      | 29 +++++-----
 .../lib/CodeGen/MachineUniformityAnalysis.cpp | 53 ++++++++-----------
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      | 38 +++++++++++++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  3 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 23 ++++++--
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |  3 ++
 12 files changed, 156 insertions(+), 56 deletions(-)

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index 307d51ac25288..a334e46b542e1 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -408,9 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
   void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
                                 const CycleT *);
 
-  bool isOperandUniform(const InstructionT &I, InstructionUniformity IU) const;
+  /// Check if an instruction with Custom uniformity can be proven uniform
+  /// based on its operands. This queries the target-specific callback.
+  bool isCustomUniform(const InstructionT &I) const;
 
-  /// \brief keep track of target instruction that can be proven uniform.
+  /// \brief keep track of instructions that require custom uniformity analysis.
   void addUniformInstruction(const InstructionT *I, InstructionUniformity IU);
 
 protected:
@@ -795,10 +797,13 @@ void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
     const InstructionT &I) {
   if (isAlwaysUniform(I))
     return;
+  // Check if instruction requires custom uniformity analysis
   auto It = UniformInstruction.find(&I);
-  if (It != UniformInstruction.end() && isOperandUniform(I, It->second)) {
-    addUniformOverride(I);
-    return;
+  if (It != UniformInstruction.end()) {
+    if (It->second == InstructionUniformity::Custom && isCustomUniform(I)) {
+      addUniformOverride(I);
+      return;
+    }
   }
   bool Marked = false;
   if (I.isTerminator()) {
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index ed558b004d322..43e588745f73f 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -25,9 +25,10 @@ enum class InstructionUniformity {
   /// The result values can never be assumed to be uniform.
   NeverUniform,
 
-  /// Result value can be uniform if any of the first two use operand are
-  /// uniform.
-  AnyOfFirstTwoUseOp
+  /// If all operands are uniform, the result values are uniform. Otherwise,
+  /// the result values may be divergent, and a custom check may be used to
+  /// determine uniformity via a callback.
+  Custom
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 384e76ebfd1b3..22cfe8468597f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -2005,6 +2005,18 @@ class TargetTransformInfo {
 
   InstructionUniformity getInstructionUniformity(const Value *V) const;
 
+  /// Determine if an instruction with some operands uniform can be proven
+  /// uniform. This is used for custom uniformity analysis where the target
+  /// can define complex rules that depend on which specific operands are
+  /// uniform.
+  ///
+  /// \param I The instruction to check.
+  /// \param UniformArgs A bitvector indicating which operands are known to be
+  ///                    uniform (bit N corresponds to operand N).
+  /// \returns true if the instruction result can be proven uniform given the
+  ///          uniform operands, false otherwise.
+  bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const;
+
 private:
   std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
 };
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 51ed2c692b4f0..c0d0cef3aa50e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1158,6 +1158,14 @@ class TargetTransformInfoImplBase {
     return InstructionUniformity::Default;
   }
 
+  // Custom uniformity check for instructions marked as Custom
+  // Override this to provide complex uniformity rules based on which operands
+  // are uniform
+  virtual bool isUniform(const Instruction *I,
+                         const SmallBitVector &UniformArgs) const {
+    return false; // Conservative: assume divergent
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 2a9522452d7cd..2bf47eeea12ce 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2359,6 +2359,22 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     return InstructionUniformity::Default;
   }
 
+  /// Determine if a machine instruction with some operands uniform can be
+  /// proven uniform. This is used for custom uniformity analysis where the
+  /// target can define complex rules that depend on which specific operands
+  /// are uniform.
+  ///
+  /// \param MI The machine instruction to check.
+  /// \param UniformArgs A bitvector indicating which register operands are
+  ///                    known to be uniform (bit N corresponds to the Nth
+  ///                    register use operand).
+  /// \returns true if the instruction result can be proven uniform given the
+  ///          uniform operands, false otherwise.
+  virtual bool isUniform(const MachineInstr &MI,
+                         const SmallBitVector &UniformArgs) const {
+    return false; // Conservative: assume divergent
+  }
+
   /// Returns true if the given \p MI defines a TargetIndex operand that can be
   /// tracked by their offset, can have values, and can have debug info
   /// associated with it. If so, sets \p Index and \p Offset of the target index
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index e4db965251e40..6d9e38901b982 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1504,6 +1504,11 @@ TargetTransformInfo::getInstructionUniformity(const Value *V) const {
   return TTIImpl->getInstructionUniformity(V);
 }
 
+bool TargetTransformInfo::isUniform(const Instruction *I,
+                                    const SmallBitVector &UniformArgs) const {
+  return TTIImpl->isUniform(I, UniformArgs);
+}
+
 TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 9a02b65f97bc5..663faf88e915f 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/Uniformity.h"
 #include "llvm/Analysis/CycleAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -36,10 +37,14 @@ template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
     switch (IU) {
     case InstructionUniformity::AlwaysUniform:
       addUniformOverride(I);
-      continue;
+      break;
     case InstructionUniformity::NeverUniform:
       markDivergent(I);
-      continue;
+      break;
+    case InstructionUniformity::Custom:
+      // Instructions requiring custom uniformity analysis based on operands
+      addUniformInstruction(&I, IU);
+      break;
     case InstructionUniformity::Default:
       break;
     }
@@ -110,18 +115,16 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
 }
 
 template <>
-bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
-    const Instruction &I, InstructionUniformity IU) const {
-  switch (IU) {
-  case InstructionUniformity::AnyOfFirstTwoUseOp:
-    // For permlane16/permlanex16: <old> <src0> <src1> <src2> <fi>
-    // <bound_control> Check if either src0 (operand 1) or src1 (operand 2 -
-    // lane select) is uniform
-    return !isDivergentUse(I.getOperandUse(1)) ||
-           !isDivergentUse(I.getOperandUse(2));
-  default:
-    return false;
+bool GenericUniformityAnalysisImpl<SSAContext>::isCustomUniform(
+    const Instruction &I) const {
+  // Build bitvector of uniform operands
+  SmallBitVector UniformArgs(I.getNumOperands());
+  for (unsigned OpIdx = 0; OpIdx < I.getNumOperands(); ++OpIdx) {
+    UniformArgs[OpIdx] = !isDivergentUse(I.getOperandUse(OpIdx));
   }
+
+  // Query target-specific uniformity callback
+  return TTI->isUniform(&I, UniformArgs);
 }
 
 // This ensures explicit instantiation of
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index d4fed2b456bd1..c9a8a05a73b6e 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -61,6 +62,10 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
       case InstructionUniformity::NeverUniform:
         markDivergent(instr);
         break;
+      case InstructionUniformity::Custom:
+        // Instructions requiring custom uniformity analysis based on operands
+        addUniformInstruction(&instr, uniformity);
+        break;
       case InstructionUniformity::Default:
         break;
       }
@@ -152,41 +157,25 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
 }
 
 template <>
-bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
-    const MachineInstr &MI, InstructionUniformity IU) const {
-  switch (IU) {
-  // For permlane16/permlanex16, check if either src or lane select is uniform
-  // These instructions have mixed immediate and register operands:
-  // Operand 1 is src0 (the source value to permute)
-  // Operand 3 is src1 (lane select - which lane within the 16 to read from)
-  // Result is uniform if EITHER the source OR lane select is uniform
-  case InstructionUniformity::AnyOfFirstTwoUseOp: {
-    // Check if any of the first two register use operands is uniform
-    // Result is uniform if ANY of these operands is uniform
-    const MachineOperand *FirstRegOp = nullptr;
-    const MachineOperand *SecondRegOp = nullptr;
-
-    // Find the first two register use operands
-    for (const MachineOperand &MO : MI.uses()) {
-      if (MO.isReg() && MO.getReg().isVirtual()) {
-        if (!FirstRegOp)
-          FirstRegOp = &MO;
-        else if (!SecondRegOp) {
-          SecondRegOp = &MO;
-          break;
-        }
-      }
-    }
-
-    if (!FirstRegOp || !SecondRegOp)
-      return false;
+bool GenericUniformityAnalysisImpl<MachineSSAContext>::isCustomUniform(
+    const MachineInstr &MI) const {
+  const auto &InstrInfo = *F.getSubtarget().getInstrInfo();
 
-    // Return true if either operand is uniform
-    return !isDivergentUse(*FirstRegOp) || !isDivergentUse(*SecondRegOp);
+  // Build bitvector of uniform register use operands
+  SmallVector<const MachineOperand *, 4> RegUseOps;
+  for (const MachineOperand &MO : MI.uses()) {
+    if (MO.isReg() && MO.getReg().isVirtual()) {
+      RegUseOps.push_back(&MO);
+    }
   }
-  default:
-    return false;
+
+  SmallBitVector UniformArgs(RegUseOps.size());
+  for (unsigned i = 0; i < RegUseOps.size(); ++i) {
+    UniformArgs[i] = !isDivergentUse(*RegUseOps[i]);
   }
+
+  // Query target-specific uniformity callback
+  return InstrInfo.isUniform(MI, UniformArgs);
 }
 
 // This ensures explicit instantiation of
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 5b541a7063e33..7592d0ccb1f73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "AMDGPUTargetMachine.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIModeRegisterDefaults.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -1656,6 +1657,18 @@ unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) const {
 // with additional support for new uniformity classifications
 InstructionUniformity
 GCNTTIImpl::getInstructionUniformity(const Value *V) const {
+  // Check for special cases requiring custom uniformity analysis
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+    switch (Intrinsic->getIntrinsicID()) {
+    case Intrinsic::amdgcn_permlane16:
+    case Intrinsic::amdgcn_permlanex16:
+      return InstructionUniformity::Custom;
+    default:
+      break;
+    }
+  }
+
+  // Delegate to old APIs for backward compatibility
   if (isAlwaysUniform(V))
     return InstructionUniformity::AlwaysUniform;
 
@@ -1664,3 +1677,28 @@ GCNTTIImpl::getInstructionUniformity(const Value *V) const {
 
   return InstructionUniformity::Default;
 }
+
+bool GCNTTIImpl::isUniform(const Instruction *I,
+                           const SmallBitVector &UniformArgs) const {
+  // Custom uniformity check for permlane16/permlanex16
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+    switch (Intrinsic->getIntrinsicID()) {
+    case Intrinsic::amdgcn_permlane16:
+    case Intrinsic::amdgcn_permlanex16:
+      // For permlane16/permlanex16:
+      // Operand 0: old value (ignored for uniformity)
+      // Operand 1: src0 (source value to permute)
+      // Operand 2: src1 (lane select within 16-lane group)
+      // Operand 3: src2 (which 16-lane group)
+      // Result is uniform if either src0 (op 1) or src1 (op 2) is uniform
+      if (UniformArgs.size() > 2) {
+        return UniformArgs[1] || UniformArgs[2];
+      }
+      return false;
+    default:
+      break;
+    }
+  }
+
+  return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 4dcf381a9af93..ae6a223b18924 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -310,6 +310,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   unsigned getNumberOfParts(Type *Tp) const override;
 
   InstructionUniformity getInstructionUniformity(const Value *V) const override;
+
+  bool isUniform(const Instruction *I,
+                 const SmallBitVector &UniformArgs) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8e15943c78998..3f249c5895e22 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -20,6 +20,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/LiveIntervals.h"
@@ -10629,11 +10630,10 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
 
   unsigned opcode = MI.getOpcode();
 
-  // Special handling for permlane16/permlanex16 - uniformity depends on
-  // operands
+  // permlane16/permlanex16 require custom uniformity analysis
   if (opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
       opcode == AMDGPU::V_PERMLANEX16_B32_e64)
-    return InstructionUniformity::AnyOfFirstTwoUseOp;
+    return InstructionUniformity::Custom;
 
   if (opcode == AMDGPU::V_READLANE_B32 ||
       opcode == AMDGPU::V_READFIRSTLANE_B32 ||
@@ -10712,6 +10712,23 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
 
   return InstructionUniformity::Default;
 }
+bool SIInstrInfo::isUniform(const MachineInstr &MI,
+                            const SmallBitVector &UniformArgs) const {
+  unsigned opcode = MI.getOpcode();
+
+  // Custom uniformity check for permlane16/permlanex16
+  if (opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
+      opcode == AMDGPU::V_PERMLANEX16_B32_e64) {
+    // Result is uniform if either src0 or src1 is uniform
+    // UniformArgs[0] = src0 (source value)
+    // UniformArgs[1] = src1 (lane select)
+    if (UniformArgs.size() >= 2) {
+      return UniformArgs[0] || UniformArgs[1];
+    }
+  }
+
+  return false;
+}
 
 unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) {
   switch (MF.getFunction().getCallingConv()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 01c09cb3bd896..0f8202d74099e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1650,6 +1650,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   InstructionUniformity
   getInstructionUniformity(const MachineInstr &MI) const final;
 
+  bool isUniform(const MachineInstr &MI,
+                 const SmallBitVector &UniformArgs) const final;
+
   InstructionUniformity
   getGenericInstructionUniformity(const MachineInstr &MI) const;
 

>From 1e81c49607ce0b06a83d54106621546322e92035 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 16 Dec 2025 18:28:29 +0530
Subject: [PATCH 10/10] refactor: rebased with latest changes

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h     | 2 --
 llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 7 -------
 llvm/lib/Analysis/TargetTransformInfo.cpp            | 5 -----
 llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 1 -
 4 files changed, 15 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 22cfe8468597f..43ab1f723a811 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -2003,8 +2003,6 @@ class TargetTransformInfo {
   /// target.
   LLVM_ABI bool allowVectorElementIndexingUsingGEP() const;
 
-  InstructionUniformity getInstructionUniformity(const Value *V) const;
-
   /// Determine if an instruction with some operands uniform can be proven
   /// uniform. This is used for custom uniformity analysis where the target
   /// can define complex rules that depend on which specific operands are
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c0d0cef3aa50e..aa27d3e212ee4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1151,13 +1151,6 @@ class TargetTransformInfoImplBase {
 
   virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
 
-  // New API for uniformity classification
-  // Targets should override this to provide target-specific uniformity analysis
-  // The default implementation returns Default (conservative behavior)
-  virtual InstructionUniformity getInstructionUniformity(const Value *V) const {
-    return InstructionUniformity::Default;
-  }
-
   // Custom uniformity check for instructions marked as Custom
   // Override this to provide complex uniformity rules based on which operands
   // are uniform
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 6d9e38901b982..8d033852d7750 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1499,11 +1499,6 @@ bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const {
   return TTIImpl->allowVectorElementIndexingUsingGEP();
 }
 
-InstructionUniformity
-TargetTransformInfo::getInstructionUniformity(const Value *V) const {
-  return TTIImpl->getInstructionUniformity(V);
-}
-
 bool TargetTransformInfo::isUniform(const Instruction *I,
                                     const SmallBitVector &UniformArgs) const {
   return TTIImpl->isUniform(I, UniformArgs);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 7592d0ccb1f73..c7f6cf6eb53c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1699,6 +1699,5 @@ bool GCNTTIImpl::isUniform(const Instruction *I,
       break;
     }
   }
-
   return false;
 }