[llvm] [AMDGPU][TTI] Add target hook for the custom instruction uniformity (PR #137639)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 00:52:08 PST 2025
https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/137639
>From 860b4859812c20b03e47e7eb25a9999188558c30 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 23 Sep 2025 17:25:29 +0530
Subject: [PATCH 1/3] [NFC] move isDivergentUse so later dependent function in
pushUsers can safely use it
---
llvm/lib/Analysis/UniformityAnalysis.cpp | 26 ++++++++++++------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 2101fdfacfc8f..a4a68bb2a0753 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -29,6 +29,19 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
return markDivergent(cast<Value>(&Instr));
}
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+ const Use &U) const {
+ const auto *V = U.get();
+ if (isDivergent(V))
+ return true;
+ if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+ const auto *UseInstr = cast<Instruction>(U.getUser());
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+ }
+ return false;
+}
+
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
if (TTI->isSourceOfDivergence(&I))
@@ -88,19 +101,6 @@ void llvm::GenericUniformityAnalysisImpl<
}
}
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
- const Use &U) const {
- const auto *V = U.get();
- if (isDivergent(V))
- return true;
- if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
- const auto *UseInstr = cast<Instruction>(U.getUser());
- return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
- }
- return false;
-}
-
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
>From afec697bced424dd3332896311931f4a0891f3e4 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Sat, 1 Nov 2025 02:02:14 +0530
Subject: [PATCH 2/3] add target hook to capture special operand uniformity and
update UA to use it
---
llvm/include/llvm/ADT/GenericUniformityImpl.h | 22 +++++++
llvm/include/llvm/ADT/Uniformity.h | 5 +-
.../llvm/Analysis/TargetTransformInfo.h | 3 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 5 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 5 ++
llvm/lib/Analysis/UniformityAnalysis.cpp | 42 +++++++++----
.../lib/CodeGen/MachineUniformityAnalysis.cpp | 11 ++++
.../Target/AMDGPU/AMDGPUSearchableTables.td | 2 -
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 14 +++++
.../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 +
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 4 +-
.../AMDGPU/uniform_intrinsic.ll | 59 +++++++++++++++++++
12 files changed, 156 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index d10355fff1bea..72dd32cab2b81 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -51,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "uniformity"
@@ -407,6 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
const CycleT *);
+ bool isOperandUniform(const InstructionT &I, InstructionUniformity IU) const;
+
+ /// \brief keep track of target instruction that can be proven uniform.
+ void addUniformInstruction(const InstructionT *I, InstructionUniformity IU);
+
protected:
/// \brief Value/block pair representing a single phi input.
struct PhiInput {
@@ -429,6 +435,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
// Internal worklist for divergence propagation.
std::vector<const InstructionT *> Worklist;
+ // Map containing tracked instruction that can be proven uniform based on its
+ // operand Uniformity.
+ llvm::DenseMap<const InstructionT *, InstructionUniformity>
+ UniformInstruction;
+
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
void analyzeControlDivergence(const InstructionT &Term);
@@ -793,6 +804,11 @@ void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
const InstructionT &I) {
if (isAlwaysUniform(I))
return;
+ auto It = UniformInstruction.find(&I);
+ if (It != UniformInstruction.end() && isOperandUniform(I, It->second)) {
+ addUniformOverride(I);
+ return;
+ }
bool Marked = false;
if (I.isTerminator()) {
Marked = DivergentTermBlocks.insert(I.getParent()).second;
@@ -824,6 +840,12 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
UniformOverrides.insert(&Instr);
}
+template <typename ContextT>
+void GenericUniformityAnalysisImpl<ContextT>::addUniformInstruction(
+ const InstructionT *I, InstructionUniformity IU) {
+ UniformInstruction[I] = IU;
+}
+
// Mark as divergent all external uses of values defined in \p DefCycle.
//
// A value V defined by a block B inside \p DefCycle may be used outside the
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index 21ca106b80be3..9571d43b8a9b9 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -23,7 +23,10 @@ enum class InstructionUniformity {
AlwaysUniform,
/// The result values can never be assumed to be uniform.
- NeverUniform
+ NeverUniform,
+
+ /// Result value can be uniform if either of first two operand are uniform.
+ EitherOfFirstTwoOp
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 022530dc846ea..252c333c6ddfe 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,8 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
+ InstructionUniformity getInstructionUniformity(const Instruction &I) const;
+
private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 990252b1e5743..8387392d7307c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1147,6 +1147,11 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
+ virtual InstructionUniformity
+ getInstructionUniformity(const Instruction &I) const {
+ return InstructionUniformity::Default;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8548afea72964..862b2fce0c0aa 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}
+InstructionUniformity
+TargetTransformInfo::getInstructionUniformity(const Instruction &I) const {
+ return TTIImpl->getInstructionUniformity(I);
+}
+
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index a4a68bb2a0753..07efba9087edd 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -8,6 +8,7 @@
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
@@ -29,25 +30,15 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
return markDivergent(cast<Value>(&Instr));
}
-template <>
-bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
- const Use &U) const {
- const auto *V = U.get();
- if (isDivergent(V))
- return true;
- if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
- const auto *UseInstr = cast<Instruction>(U.getUser());
- return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
- }
- return false;
-}
-
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
if (TTI->isSourceOfDivergence(&I))
markDivergent(I);
else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
+ InstructionUniformity IU = TTI->getInstructionUniformity(I);
+ if (IU != InstructionUniformity::Default)
+ addUniformInstruction(&I, IU);
}
for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
@@ -101,6 +92,31 @@ void llvm::GenericUniformityAnalysisImpl<
}
}
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+ const Use &U) const {
+ const auto *V = U.get();
+ if (isDivergent(V))
+ return true;
+ if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+ const auto *UseInstr = cast<Instruction>(U.getUser());
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+ }
+ return false;
+}
+
+template <>
+bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
+ const Instruction &I, InstructionUniformity IU) const {
+ switch (IU) {
+ case InstructionUniformity::EitherOfFirstTwoOp:
+ return !isDivergentUse(I.getOperandUse(0)) ||
+ !isDivergentUse(I.getOperandUse(1));
+ default:
+ return false;
+ }
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 8c95dc71d4e21..091a6e5130998 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -147,6 +147,17 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
}
+template <>
+bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
+ const MachineInstr &I, InstructionUniformity IU) const {
+ switch (IU) {
+ case InstructionUniformity::EitherOfFirstTwoOp:
+ return !isDivergentUse(I.getOperand(0)) || !isDivergentUse(I.getOperand(1));
+ default:
+ return false;
+ }
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<MachineSSAContext>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 3b62dcf3c92cd..da3776761ab34 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -317,8 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
-def : SourceOfDivergence<int_amdgcn_permlane16>;
-def : SourceOfDivergence<int_amdgcn_permlanex16>;
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 204d3df546bbf..704e7946fa28c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1422,3 +1422,17 @@ void GCNTTIImpl::collectKernelLaunchBounds(
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
}
+
+InstructionUniformity
+GCNTTIImpl::getInstructionUniformity(const Instruction &I) const {
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16:
+ return InstructionUniformity::EitherOfFirstTwoOp;
+ default:
+ break;
+ }
+ }
+ return InstructionUniformity::Default;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index f6f7bd4bfcf5b..9ed4e3e888d81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -290,6 +290,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
+ InstructionUniformity
+ getInstructionUniformity(const Instruction &I) const override;
};
} // end namespace llvm
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index bb840023daf5d..f209c996c7692 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -7,14 +7,14 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
ret void
}
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
ret void
}
-; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
new file mode 100644
index 0000000000000..37be465a7796b
--- /dev/null
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: ALL VALUES UNIFORM
+define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_usr_x16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v1, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_x16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ %v1 = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v1, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_usr_16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %v, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v1, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
+; CHECK: store i32 %v1, ptr addrspace(1) %out, align 4
+define amdgpu_kernel void @div_permlane16_var_uni_16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+ %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ %v1 = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
+ store i32 %v1, ptr addrspace(1) %out
+ ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 885f77d5e5b309963f3f62f0634cec987a368200 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 4 Nov 2025 14:21:00 +0530
Subject: [PATCH 3/3] update enum name for more clarity
---
llvm/include/llvm/ADT/Uniformity.h | 5 +++--
llvm/lib/Analysis/UniformityAnalysis.cpp | 2 +-
llvm/lib/CodeGen/MachineUniformityAnalysis.cpp | 4 ++--
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 +-
4 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index 9571d43b8a9b9..ed558b004d322 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -25,8 +25,9 @@ enum class InstructionUniformity {
/// The result values can never be assumed to be uniform.
NeverUniform,
- /// Result value can be uniform if either of first two operand are uniform.
- EitherOfFirstTwoOp
+ /// Result value can be uniform if any of the first two use operand are
+ /// uniform.
+ AnyOfFirstTwoUseOp
};
} // namespace llvm
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 07efba9087edd..61b01b3a28469 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -109,7 +109,7 @@ template <>
bool GenericUniformityAnalysisImpl<SSAContext>::isOperandUniform(
const Instruction &I, InstructionUniformity IU) const {
switch (IU) {
- case InstructionUniformity::EitherOfFirstTwoOp:
+ case InstructionUniformity::AnyOfFirstTwoUseOp:
return !isDivergentUse(I.getOperandUse(0)) ||
!isDivergentUse(I.getOperandUse(1));
default:
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 091a6e5130998..e1a4037584030 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -151,8 +151,8 @@ template <>
bool GenericUniformityAnalysisImpl<MachineSSAContext>::isOperandUniform(
const MachineInstr &I, InstructionUniformity IU) const {
switch (IU) {
- case InstructionUniformity::EitherOfFirstTwoOp:
- return !isDivergentUse(I.getOperand(0)) || !isDivergentUse(I.getOperand(1));
+ case InstructionUniformity::AnyOfFirstTwoUseOp:
+ return !isDivergentUse(I.getOperand(1)) || !isDivergentUse(I.getOperand(2));
default:
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 704e7946fa28c..18be9bd60ff22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1429,7 +1429,7 @@ GCNTTIImpl::getInstructionUniformity(const Instruction &I) const {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_permlane16:
case Intrinsic::amdgcn_permlanex16:
- return InstructionUniformity::EitherOfFirstTwoOp;
+ return InstructionUniformity::AnyOfFirstTwoUseOp;
default:
break;
}
More information about the llvm-commits
mailing list