[llvm] Reland "[NVPTX] Use .common linkage for common globals" (PR #86824)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 09:15:51 PDT 2024


https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/86824

Switch from `.weak` to `.common` linkage for common global variables where possible. The `.common` linkage is described in [PTX ISA 11.6.4. Linking Directives: .common](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#linking-directives-common)
> Declares identifier to be globally visible but “common”.
>
>Common symbols are similar to globally visible symbols. However multiple object files may declare the same common symbol and they may have different types and sizes and references to a symbol get resolved against a common symbol with the largest size.
>
>Only one object file can initialize a common symbol and that must have the largest size among all other definitions of that common symbol from different object files.
>
>.common linking directive can be used only on variables with .global storage. It cannot be used on function symbols or on symbols with opaque type.

I've updated the logic and tests to only use `.common` for PTX 5.0 or greater and verified that the new tests now pass with `ptxas`.

>From dea1a5af554cc74aa0fab98cd2e5fe323e20bd34 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Mon, 12 Feb 2024 18:17:06 +0000
Subject: [PATCH 1/2] [NVPTX] Use .common linkage for common globals

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 16 ++++++++------
 llvm/test/CodeGen/NVPTX/common-linkage.ll | 26 +++++++++++++++++++++++
 llvm/test/CodeGen/NVPTX/weak-global.ll    |  2 +-
 3 files changed, 36 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/common-linkage.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2783b1e0ec73eb..2173f80865ead0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1022,7 +1022,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   const DataLayout &DL = getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
-  PointerType *PTy = GVar->getType();
   Type *ETy = GVar->getValueType();
 
   if (GVar->hasExternalLinkage()) {
@@ -1030,6 +1029,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       O << ".visible ";
     else
       O << ".extern ";
+  } else if (GVar->hasCommonLinkage() &&
+             GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) {
+    O << ".common ";
   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
              GVar->hasAvailableExternallyLinkage() ||
              GVar->hasCommonLinkage()) {
@@ -1141,7 +1143,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   }
 
   O << ".";
-  emitPTXAddressSpace(PTy->getAddressSpace(), O);
+  emitPTXAddressSpace(GVar->getAddressSpace(), O);
 
   if (isManaged(*GVar)) {
     if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
@@ -1170,8 +1172,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
     // Ptx allows variable initilization only for constant and global state
     // spaces.
     if (GVar->hasInitializer()) {
-      if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
-          (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
+      if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
+          (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
         const Constant *Initializer = GVar->getInitializer();
         // 'undef' is treated as there is no value specified.
         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
@@ -1186,7 +1188,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
             !isa<UndefValue>(GVar->getInitializer())) {
           report_fatal_error("initial value of '" + GVar->getName() +
                              "' is not allowed in addrspace(" +
-                             Twine(PTy->getAddressSpace()) + ")");
+                             Twine(GVar->getAddressSpace()) + ")");
         }
       }
     }
@@ -1205,8 +1207,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       ElementSize = DL.getTypeStoreSize(ETy);
       // Ptx allows variable initilization only for constant and
       // global state spaces.
-      if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
-           (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
+      if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
+           (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
           GVar->hasInitializer()) {
         const Constant *Initializer = GVar->getInitializer();
         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
diff --git a/llvm/test/CodeGen/NVPTX/common-linkage.ll b/llvm/test/CodeGen/NVPTX/common-linkage.ll
new file mode 100644
index 00000000000000..ac16d9a6d7c763
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/common-linkage.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+; CHECK: .common .global .align 4 .u32 g
+ at g = common addrspace(1) global i32 0, align 4
+
+; CHECK: .weak .const .align 4 .u32 c
+ at c = common addrspace(4) global i32 0, align 4
+
+; CHECK: .weak .shared .align 4 .u32 s
+ at s = common addrspace(3) global i32 0, align 4
+
+define i32 @f1() {
+  %1 = load i32, ptr addrspace(1) @g
+  ret i32 %1
+}
+
+define i32 @f4() {
+  %1 = load i32, ptr addrspace(4) @c
+  ret i32 %1
+}
+
+define i32 @f3() {
+  %1 = load i32, ptr addrspace(3) @s
+  ret i32 %1
+}
diff --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll
index dd0160d1c0a654..781ecb9b149567 100644
--- a/llvm/test/CodeGen/NVPTX/weak-global.ll
+++ b/llvm/test/CodeGen/NVPTX/weak-global.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
-; CHECK: .weak .global .align 4 .u32 g
+; CHECK: .common .global .align 4 .u32 g
 @g = common addrspace(1) global i32 zeroinitializer
 
 define i32 @func0() {

>From df2cb801da77409ff9248fb1d3923700c6040a71 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Wed, 27 Mar 2024 16:10:43 +0000
Subject: [PATCH 2/2] fixup for older ptx versions

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +-
 llvm/test/CodeGen/NVPTX/common-linkage.ll | 9 ++++++---
 llvm/test/CodeGen/NVPTX/weak-global.ll    | 9 ++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2173f80865ead0..d005055627aca2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1029,7 +1029,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       O << ".visible ";
     else
       O << ".extern ";
-  } else if (GVar->hasCommonLinkage() &&
+  } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
              GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) {
     O << ".common ";
   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
diff --git a/llvm/test/CodeGen/NVPTX/common-linkage.ll b/llvm/test/CodeGen/NVPTX/common-linkage.ll
index ac16d9a6d7c763..976074e12ba667 100644
--- a/llvm/test/CodeGen/NVPTX/common-linkage.ll
+++ b/llvm/test/CodeGen/NVPTX/common-linkage.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes CHECK,PTX43
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx50 | FileCheck %s --check-prefixes CHECK,PTX50
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | %ptxas-verify %}
 
-; CHECK: .common .global .align 4 .u32 g
+; PTX43: .weak .global .align 4 .u32 g
+; PTX50: .common .global .align 4 .u32 g
 @g = common addrspace(1) global i32 0, align 4
 
 ; CHECK: .weak .const .align 4 .u32 c
diff --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll
index 781ecb9b149567..c5467aad08a360 100644
--- a/llvm/test/CodeGen/NVPTX/weak-global.ll
+++ b/llvm/test/CodeGen/NVPTX/weak-global.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefix PTX43
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | FileCheck %s --check-prefix PTX50
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx50 | %ptxas-verify %}
 
-; CHECK: .common .global .align 4 .u32 g
+; PTX43: .weak .global .align 4 .u32 g
+; PTX50: .common .global .align 4 .u32 g
 @g = common addrspace(1) global i32 zeroinitializer
 
 define i32 @func0() {



More information about the llvm-commits mailing list