[llvm] Enable .ptr .global .align attributes for kernel attributes for CUDA (PR #79646)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 26 13:05:15 PST 2024


https://github.com/Vandana2896 created https://github.com/llvm/llvm-project/pull/79646

The current issue is PTX doesn't vectorise load and stores that can be vectorized.

We noticed that we were missing vectorization for sin, cos and power operations from LLVM resulting in lesser speedup. The reason is currently we don't generate any .ptr and .align attributes for kernel parameters in CUDA and the required alignment information is missing. This results in missing out on vectorization opportunities.
The change enables adding .align attribute for alignment information and .ptr attribute for kernel pointers in kernel parameters under the assumption that all kernel parameters pointers point to global memory space. This results in vectorization and boosting the speedup by ~2x.
.align is enabled only when the pointer has explicit alignment specifier.

PTX ISA doc - https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#kernel-parameter-attribute-ptr

>From d5bd0215f22440e10e1a4af2b4391973831795be Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Fri, 26 Jan 2024 13:03:27 -0800
Subject: [PATCH] Enable .ptr .global .align attributes for kernel attributes
 for CUDA

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp     |  4 +++
 llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 34 +++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/kernel-param-align.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 6c4879ba183c0a5..0a0fbff2ad6c11a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1610,6 +1610,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
             }
             Align ParamAlign = I->getParamAlign().valueOrOne();
             O << ".align " << ParamAlign.value() << " ";
+          } else if (I->getParamAlign().valueOrOne() != 1) {
+            O << ".ptr .global ";
+            Align ParamAlign = I->getParamAlign().value();
+            O << ".align " << ParamAlign.value() << " ";
           }
           O << TLI->getParamName(F, paramIndex);
           continue;
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
new file mode 100644
index 000000000000000..eda45928ea3059d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_72 2>&1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_72 | %ptxas-verify %}
+
+%struct.Large = type { [16 x double] }
+
+; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
+; CHECK: .param .u64 func_align_param_1,
+; CHECK: .param .u32 func_align_param_2
+define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, i32 %n) {
+entry:
+  %0 = addrspacecast ptr %out to ptr addrspace(1)
+  %1 = addrspacecast ptr %input to ptr addrspace(1)
+  %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
+  %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
+  store i32 %tmp2, ptr addrspace(1) %0, align 4
+  ret void
+}
+
+; CHECK: .param .u64 func_param_0,
+; CHECK: .param .u64 func_param_1,
+; CHECK: .param .u32 func_param_2
+define void @func(ptr nocapture readonly %input, ptr nocapture %out, i32 %n) {
+entry:
+  %0 = addrspacecast ptr %out to ptr addrspace(1)
+  %1 = addrspacecast ptr %input to ptr addrspace(1)
+  %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
+  %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
+  store i32 %tmp2, ptr addrspace(1) %0, align 4
+  ret void
+}
+
+!nvvm.annotations = !{!0, !1}
+!0 = !{ptr @func_align, !"kernel", i32 1}
+!1 = !{ptr @func, !"kernel", i32 1}
\ No newline at end of file



More information about the llvm-commits mailing list