[llvm] [AMDGPU] Make AMDGPULowerKernelArguments a module pass (PR #112790)

Thu Oct 17 15:39:04 PDT 2024

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 4512bbe7467c1c0f884304e5654d1070df58d6f8 a8cb03ff24446a85ea82963d3585204b0874a55a --extensions cpp,h -- llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 7b986b4385..02ca044c4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -264,8 +264,8 @@ public:
     for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
       AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
 
-      // If this is a dynamic alloca, the value may depend on the loaded kernargs,
-      // so loads will need to be inserted before it.
+      // If this is a dynamic alloca, the value may depend on the loaded
+      // kernargs, so loads will need to be inserted before it.
       if (!AI || !AI->isStaticAlloca())
         break;
     }
@@ -314,7 +314,8 @@ public:
       uint64_t Size = DL.getTypeSizeInBits(ArgTy);
       uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
 
-      uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
+      uint64_t EltOffset =
+          alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
       uint64_t LastExplicitArgOffset = ExplicitArgOffset;
       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
 
@@ -328,7 +329,7 @@ public:
       if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&
           !Arg.getType()->isAggregateType())
         if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,
-                                            LastExplicitArgOffset))
+                                             LastExplicitArgOffset))
           continue;
 
       InPreloadSequence = false;
@@ -336,8 +337,8 @@ public:
       if (Arg.use_empty())
         continue;
 
-      // If this is byval, the loads are already explicit in the function. We just
-      // need to rewrite the pointer values.
+      // If this is byval, the loads are already explicit in the function. We
+      // just need to rewrite the pointer values.
       if (IsByRef) {
         Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(
             Builder.getInt8Ty(), KernArgSegment, EltOffset,
@@ -351,11 +352,11 @@ public:
 
       if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
         // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing
-        // modes on SI to know the high bits are 0 so pointer adds don't wrap. We
-        // can't represent this with range metadata because it's only allowed for
-        // integer types.
+        // modes on SI to know the high bits are 0 so pointer adds don't wrap.
+        // We can't represent this with range metadata because it's only allowed
+        // for integer types.
         if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
-            PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
+             PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
             !ST.hasUsableDSOffset())
           continue;
 
@@ -380,12 +381,12 @@ public:
       Type *AdjustedArgTy;
       if (DoShiftOpt) { // FIXME: Handle aggregate types
         // Since we don't have sub-dword scalar loads, avoid doing an extload by
-        // loading earlier than the argument address, and extracting the relevant
-        // bits.
+        // loading earlier than the argument address, and extracting the
+        // relevant bits.
         // TODO: Update this for GFX12 which does have scalar sub-dword loads.
         //
-        // Additionally widen any sub-dword load to i32 even if suitably aligned,
-        // so that CSE between different argument loads works easily.
+        // Additionally widen any sub-dword load to i32 even if suitably
+        // aligned, so that CSE between different argument loads works easily.
         ArgPtr = Builder.CreateConstInBoundsGEP1_64(
             Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
             Arg.getName() + ".kernarg.offset.align.down");
@@ -416,39 +417,38 @@ public:
         uint64_t DerefBytes = Arg.getDereferenceableBytes();
         if (DerefBytes != 0) {
           Load->setMetadata(
-            LLVMContext::MD_dereferenceable,
-            MDNode::get(Ctx,
-                        MDB.createConstant(
-                          ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));
+              LLVMContext::MD_dereferenceable,
+              MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
+                                   Builder.getInt64Ty(), DerefBytes))));
         }
 
         uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();
         if (DerefOrNullBytes != 0) {
           Load->setMetadata(
-            LLVMContext::MD_dereferenceable_or_null,
-            MDNode::get(Ctx,
-                        MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
-                                                            DerefOrNullBytes))));
+              LLVMContext::MD_dereferenceable_or_null,
+              MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
+                                   Builder.getInt64Ty(), DerefOrNullBytes))));
         }
 
         if (MaybeAlign ParamAlign = Arg.getParamAlign()) {
           Load->setMetadata(
               LLVMContext::MD_align,
-              MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
-                                  Builder.getInt64Ty(), ParamAlign->value()))));
+              MDNode::get(Ctx,
+                          MDB.createConstant(ConstantInt::get(
+                              Builder.getInt64Ty(), ParamAlign->value()))));
         }
       }
 
       // TODO: Convert noalias arg to !noalias
 
       if (DoShiftOpt) {
-        Value *ExtractBits = OffsetDiff == 0 ?
-          Load : Builder.CreateLShr(Load, OffsetDiff * 8);
+        Value *ExtractBits =
+            OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8);
 
         IntegerType *ArgIntTy = Builder.getIntNTy(Size);
         Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);
-        Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,
-                                              Arg.getName() + ".load");
+        Value *NewVal =
+            Builder.CreateBitCast(Trunc, ArgTy, Arg.getName() + ".load");
         Arg.replaceAllUsesWith(NewVal);
       } else if (IsV3) {
         Value *Shuf = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 2},

``````````

</details>


https://github.com/llvm/llvm-project/pull/112790