[llvm] [AMDGPU] Build lane intrinsics in a mangling-agnostic way. NFC. (PR #91583)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 04:06:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Use the form of CreateIntrinsic that takes an explicit return type and
works out the mangling based on that and the types of the arguments. The
advantage is that this still works if intrinsics are changed to have
type mangling, e.g. if readlane/readfirstlane/writelane are changed to
work on any type.
---
Full diff: https://github.com/llvm/llvm-project/pull/91583.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp (+11-11)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index ad98f4f743ae..1d645002b1fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
if (!ST->isWave32()) {
// Combine lane 31 into lanes 32..63.
V = B.CreateBitCast(V, IntNTy);
- Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
- {V, B.getInt32(31)});
+ Value *const Lane31 = B.CreateIntrinsic(
+ V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)});
Value *UpdateDPPCall = B.CreateCall(
UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
@@ -598,8 +598,8 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
// Get the value required for atomic operation
V = B.CreateBitCast(V, IntNTy);
- Value *LaneValue =
- B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+ Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane,
+ {V, LaneIdxInt});
LaneValue = B.CreateBitCast(LaneValue, Ty);
// Perform writelane if intermediate scan results are required later in the
@@ -607,7 +607,7 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
Value *OldValue = nullptr;
if (NeedResult) {
OldValue =
- B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+ B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane,
{B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
B.CreateBitCast(OldValuePhi, IntNTy)});
OldValue = B.CreateBitCast(OldValue, Ty);
@@ -789,7 +789,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
assert(TyBitWidth == 32);
NewV = B.CreateBitCast(NewV, IntNTy);
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+ NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane,
{NewV, LastLaneIdx});
NewV = B.CreateBitCast(NewV, Ty);
}
@@ -936,10 +936,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty);
Value *const ExtractHi =
B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty);
- CallInst *const ReadFirstLaneLo =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
- CallInst *const ReadFirstLaneHi =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+ CallInst *const ReadFirstLaneLo = B.CreateIntrinsic(
+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
+ CallInst *const ReadFirstLaneHi = B.CreateIntrinsic(
+ Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
Value *const PartialInsert = B.CreateInsertElement(
PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
Value *const Insert =
@@ -948,7 +948,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
} else if (TyBitWidth == 32) {
Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
BroadcastI =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
+ B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
BroadcastI = B.CreateBitCast(BroadcastI, Ty);
} else {
``````````
</details>
https://github.com/llvm/llvm-project/pull/91583
More information about the llvm-commits
mailing list