[llvm] e2d17a0 - [AMDGPU] Build lane intrinsics in a mangling-agnostic way. NFC. (#91583)

Thu May 9 05:42:44 PDT 2024

Author: Jay Foad
Date: 2024-05-09T13:42:40+01:00
New Revision: e2d17a053edff68c9761aceb8ff8303e3e37172c

URL: https://github.com/llvm/llvm-project/commit/e2d17a053edff68c9761aceb8ff8303e3e37172c
DIFF: https://github.com/llvm/llvm-project/commit/e2d17a053edff68c9761aceb8ff8303e3e37172c.diff

LOG: [AMDGPU] Build lane intrinsics in a mangling-agnostic way. NFC. (#91583)

Use the form of CreateIntrinsic that takes an explicit return type and
works out the mangling based on that and the types of the arguments. The
advantage is that this still works if intrinsics are changed to have
type mangling, e.g. if readlane/readfirstlane/writelane are changed to
work on any type.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index ad98f4f743ae..1d645002b1fe 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -493,8 +493,8 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
     if (!ST->isWave32()) {
       // Combine lane 31 into lanes 32..63.
       V = B.CreateBitCast(V, IntNTy);
-      Value *const Lane31 = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
-                                              {V, B.getInt32(31)});
+      Value *const Lane31 = B.CreateIntrinsic(
+          V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)});
 
       Value *UpdateDPPCall = B.CreateCall(
           UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID),
@@ -598,8 +598,8 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
 
   // Get the value required for atomic operation
   V = B.CreateBitCast(V, IntNTy);
-  Value *LaneValue =
-      B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+  Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane,
+                                       {V, LaneIdxInt});
   LaneValue = B.CreateBitCast(LaneValue, Ty);
 
   // Perform writelane if intermediate scan results are required later in the
@@ -607,7 +607,7 @@ std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
   Value *OldValue = nullptr;
   if (NeedResult) {
     OldValue =
-        B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+        B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane,
                           {B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt,
                            B.CreateBitCast(OldValuePhi, IntNTy)});
     OldValue = B.CreateBitCast(OldValue, Ty);
@@ -789,7 +789,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
         Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
         assert(TyBitWidth == 32);
         NewV = B.CreateBitCast(NewV, IntNTy);
-        NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+        NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane,
                                  {NewV, LastLaneIdx});
         NewV = B.CreateBitCast(NewV, Ty);
       }
@@ -936,10 +936,10 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
       Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty);
       Value *const ExtractHi =
           B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty);
-      CallInst *const ReadFirstLaneLo =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
-      CallInst *const ReadFirstLaneHi =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+      CallInst *const ReadFirstLaneLo = B.CreateIntrinsic(
+          Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo);
+      CallInst *const ReadFirstLaneHi = B.CreateIntrinsic(
+          Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi);
       Value *const PartialInsert = B.CreateInsertElement(
           PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
       Value *const Insert =
@@ -948,7 +948,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
     } else if (TyBitWidth == 32) {
       Value *CastedPhi = B.CreateBitCast(PHI, IntNTy);
       BroadcastI =
-          B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
+          B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi);
       BroadcastI = B.CreateBitCast(BroadcastI, Ty);
 
     } else {