[llvm] 713ca2f - [AMDGPU] Introduce command line switch to control super aligning of LDS.

Mon Jun 7 15:29:51 PDT 2021

Author: hsmahesha
Date: 2021-06-08T03:58:13+05:30
New Revision: 713ca2f3604fdaf0edd5a4e2869c5685e7ec4216

URL: https://github.com/llvm/llvm-project/commit/713ca2f3604fdaf0edd5a4e2869c5685e7ec4216
DIFF: https://github.com/llvm/llvm-project/commit/713ca2f3604fdaf0edd5a4e2869c5685e7ec4216.diff

LOG: [AMDGPU] Introduce command line switch to control super aligning of LDS.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D103817

Added: 
    llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 8e3895ee14166..8e3ce775b5819 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <algorithm>
@@ -46,6 +47,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool> SuperAlignLDSGlobals(
+    "amdgpu-super-align-lds-globals",
+    cl::desc("Increase alignment of LDS if it is not on align boundary"),
+    cl::init(true), cl::Hidden);
+
 namespace {
 
 class AMDGPULowerModuleLDS : public ModulePass {
@@ -174,31 +180,27 @@ class AMDGPULowerModuleLDS : public ModulePass {
 
     // Increase the alignment of LDS globals if necessary to maximise the chance
     // that we can use aligned LDS instructions to access them.
-    for (auto *GV : FoundLocalVars) {
-      unsigned AlignValue = GV->getAlignment();
-      if (AlignValue == 0) {
-        GV->setAlignment(DL.getABITypeAlign(GV->getValueType()));
-        continue;
-      }
+    if (SuperAlignLDSGlobals) {
+      for (auto *GV : FoundLocalVars) {
+        Align Alignment = AMDGPU::getAlign(DL, GV);
+        TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
+
+        if (GVSize > 8) {
+          // We might want to use a b96 or b128 load/store
+          Alignment = std::max(Alignment, Align(16));
+        } else if (GVSize > 4) {
+          // We might want to use a b64 load/store
+          Alignment = std::max(Alignment, Align(8));
+        } else if (GVSize > 2) {
+          // We might want to use a b32 load/store
+          Alignment = std::max(Alignment, Align(4));
+        } else if (GVSize > 1) {
+          // We might want to use a b16 load/store
+          Alignment = std::max(Alignment, Align(2));
+        }
 
-      Align Alignment(AlignValue);
-      TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
-
-      if (GVSize > 8) {
-        // We might want to use a b96 or b128 load/store
-        Alignment = std::max(Alignment, Align(16));
-      } else if (GVSize > 4) {
-        // We might want to use a b64 load/store
-        Alignment = std::max(Alignment, Align(8));
-      } else if (GVSize > 2) {
-        // We might want to use a b32 load/store
-        Alignment = std::max(Alignment, Align(4));
-      } else if (GVSize > 1) {
-        // We might want to use a b16 load/store
-        Alignment = std::max(Alignment, Align(2));
+        GV->setAlignment(Alignment);
       }
-
-      GV->setAlignment(Alignment);
     }
 
     // Sort by alignment, descending, to minimise padding.

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
new file mode 100644
index 0000000000000..206f51f5be3ea
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck --check-prefix=SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck --check-prefix=SUPER-ALIGN_ON %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefix=SUPER-ALIGN_OFF %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefix=SUPER-ALIGN_OFF %s
+
+; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [32 x i8] }
+
+; CHECK-NOT: @lds.1
+ at lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
+
+; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 16
+; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 1
+
+; CHECK-LABEL: @k4
+; CHECK:   %ptr = getelementptr inbounds i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k4.lds.t, %llvm.amdgcn.kernel.k4.lds.t addrspace(3)* @llvm.amdgcn.kernel.k4.lds, i32 0, i32
+; CHECK: 0, i32 0) to i8*), i64 %x
+; CHECK:   store i8 1, i8* %ptr, align 1
+; CHECK:   ret void
+define amdgpu_kernel void @k4(i64 %x) {
+  %ptr = getelementptr inbounds i8, i8* addrspacecast ([32 x i8] addrspace(3)* @lds.1 to i8*), i64 %x
+  store i8 1, i8 addrspace(0)* %ptr, align 1
+  ret void
+}