[llvm] 24b28db - [amdgpu] Increase alignment of all LDS variables

Jon Chesterfield via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 12 11:30:55 PST 2021


Author: Jon Chesterfield
Date: 2021-12-12T19:30:32Z
New Revision: 24b28db8ccd6afabb99e770d555dadfc3cced9b7

URL: https://github.com/llvm/llvm-project/commit/24b28db8ccd6afabb99e770d555dadfc3cced9b7
DIFF: https://github.com/llvm/llvm-project/commit/24b28db8ccd6afabb99e770d555dadfc3cced9b7.diff

LOG: [amdgpu] Increase alignment of all LDS variables

Currently the superalign option only increases the alignment of
variables that are moved into the module.lds block. Change that to all LDS
variables. Also only increase the alignment once, instead of once per function.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D115488

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index f3753b7595e04..6e2b5dc471bc7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -164,8 +164,8 @@ class AMDGPULowerModuleLDS : public ModulePass {
 
   bool runOnModule(Module &M) override {
     UsedList = getUsedList(M);
-
-    bool Changed = processUsedLDS(M);
+    bool Changed = superAlignLDSGlobals(M);
+    Changed |= processUsedLDS(M);
 
     for (Function &F : M.functions()) {
       if (F.isDeclaration())
@@ -182,6 +182,50 @@ class AMDGPULowerModuleLDS : public ModulePass {
   }
 
 private:
+  // Increase the alignment of LDS globals if necessary to maximise the chance
+  // that we can use aligned LDS instructions to access them.
+  static bool superAlignLDSGlobals(Module &M) {
+    const DataLayout &DL = M.getDataLayout();
+    bool Changed = false;
+    if (!SuperAlignLDSGlobals) {
+      return Changed;
+    }
+
+    for (auto &GV : M.globals()) {
+      if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
+        // Only changing alignment of LDS variables
+        continue;
+      }
+      if (!GV.hasInitializer()) {
+        // cuda/hip extern __shared__ variable, leave alignment alone
+        continue;
+      }
+
+      Align Alignment = AMDGPU::getAlign(DL, &GV);
+      TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
+
+      if (GVSize > 8) {
+        // We might want to use a b96 or b128 load/store
+        Alignment = std::max(Alignment, Align(16));
+      } else if (GVSize > 4) {
+        // We might want to use a b64 load/store
+        Alignment = std::max(Alignment, Align(8));
+      } else if (GVSize > 2) {
+        // We might want to use a b32 load/store
+        Alignment = std::max(Alignment, Align(4));
+      } else if (GVSize > 1) {
+        // We might want to use a b16 load/store
+        Alignment = std::max(Alignment, Align(2));
+      }
+
+      if (Alignment != AMDGPU::getAlign(DL, &GV)) {
+        Changed = true;
+        GV.setAlignment(Alignment);
+      }
+    }
+    return Changed;
+  }
+
   bool processUsedLDS(Module &M, Function *F = nullptr) {
     LLVMContext &Ctx = M.getContext();
     const DataLayout &DL = M.getDataLayout();
@@ -195,31 +239,6 @@ class AMDGPULowerModuleLDS : public ModulePass {
       return false;
     }
 
-    // Increase the alignment of LDS globals if necessary to maximise the chance
-    // that we can use aligned LDS instructions to access them.
-    if (SuperAlignLDSGlobals) {
-      for (auto *GV : FoundLocalVars) {
-        Align Alignment = AMDGPU::getAlign(DL, GV);
-        TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
-
-        if (GVSize > 8) {
-          // We might want to use a b96 or b128 load/store
-          Alignment = std::max(Alignment, Align(16));
-        } else if (GVSize > 4) {
-          // We might want to use a b64 load/store
-          Alignment = std::max(Alignment, Align(8));
-        } else if (GVSize > 2) {
-          // We might want to use a b32 load/store
-          Alignment = std::max(Alignment, Align(4));
-        } else if (GVSize > 1) {
-          // We might want to use a b16 load/store
-          Alignment = std::max(Alignment, Align(2));
-        }
-
-        GV->setAlignment(Alignment);
-      }
-    }
-
     SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
     LayoutFields.reserve(FoundLocalVars.size());
     for (GlobalVariable *GV : FoundLocalVars) {

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
index 5ce7dcb36ba6e..a6225bc7c2ed9 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
 
@@ -8,6 +8,10 @@
 ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
 ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
 
+; SUPER-ALIGN_ON: @lds.unused = addrspace(3) global i32 undef, align 4
+; SUPER-ALIGN_OFF: @lds.unused = addrspace(3) global i32 undef, align 2
+ at lds.unused = addrspace(3) global i32 undef, align 2
+
 ; CHECK-NOT: @lds.1
 @lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
 


        


More information about the llvm-commits mailing list