[llvm] r356143 - AMDGPU: Don't add unnecessary convergent attributes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 06:46:09 PDT 2019
Author: arsenm
Date: Thu Mar 14 06:46:09 2019
New Revision: 356143
URL: http://llvm.org/viewvc/llvm-project?rev=356143&view=rev
Log:
AMDGPU: Don't add unnecessary convergent attributes
These are redundant with the intrinsic declaration.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp?rev=356143&r1=356142&r2=356143&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp Thu Mar 14 06:46:09 2019
@@ -58,8 +58,6 @@ private:
void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
unsigned ValIdx, bool ValDivergent) const;
- void setConvergent(CallInst *const CI) const;
-
public:
static char ID;
@@ -253,7 +251,6 @@ void AMDGPUAtomicOptimizer::optimizeAtom
CallInst *const Ballot =
B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()},
{B.getInt32(1), B.getInt32(0), B.getInt32(33)});
- setConvergent(Ballot);
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
@@ -281,13 +278,11 @@ void AMDGPUAtomicOptimizer::optimizeAtom
// correctly contribute to the final result.
CallInst *const SetInactive =
B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
- setConvergent(SetInactive);
CallInst *const FirstDPP =
B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
{Identity, SetInactive, B.getInt32(DPP_WF_SR1),
B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
- setConvergent(FirstDPP);
NewV = FirstDPP;
const unsigned Iters = 7;
@@ -305,7 +300,6 @@ void AMDGPUAtomicOptimizer::optimizeAtom
Intrinsic::amdgcn_update_dpp, Ty,
{Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
- setConvergent(DPP);
NewV = B.CreateBinOp(Op, NewV, DPP);
}
@@ -322,10 +316,8 @@ void AMDGPUAtomicOptimizer::optimizeAtom
B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadLaneLo = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
- setConvergent(ReadLaneLo);
CallInst *const ReadLaneHi = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
- setConvergent(ReadLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
Value *const Insert =
@@ -334,7 +326,6 @@ void AMDGPUAtomicOptimizer::optimizeAtom
} else if (TyBitWidth == 32) {
CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
{}, {NewV, B.getInt32(63)});
- setConvergent(ReadLane);
NewV = ReadLane;
} else {
llvm_unreachable("Unhandled atomic bit width");
@@ -398,20 +389,16 @@ void AMDGPUAtomicOptimizer::optimizeAtom
B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadFirstLaneLo =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
- setConvergent(ReadFirstLaneLo);
CallInst *const ReadFirstLaneHi =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
- setConvergent(ReadFirstLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
Value *const Insert =
B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
BroadcastI = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
- CallInst *const ReadFirstLane =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
- setConvergent(ReadFirstLane);
- BroadcastI = ReadFirstLane;
+
+ BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
} else {
llvm_unreachable("Unhandled atomic bit width");
}
@@ -439,10 +426,6 @@ void AMDGPUAtomicOptimizer::optimizeAtom
I.eraseFromParent();
}
-void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-}
-
INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
More information about the llvm-commits
mailing list