[clang] [CUDA][HIP] Do not mark extern shared var (PR #65990)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 11 11:13:45 PDT 2023


https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/65990:

Fixes: https://github.com/llvm/llvm-project/issues/65806

Currently clang put extern shared var ODR-used by host device functions in global var __clang_gpu_used_external. This behavior was due to https://reviews.llvm.org/D123441. However, clang should not do that for extern shared vars since their addresses are per warp, therefore cannot be accessed by host code.

>From 4557e1e93db2c917048f276969808ac894744a71 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Mon, 11 Sep 2023 13:13:50 -0400
Subject: [PATCH] [CUDA][HIP] Do not mark extern shared var

Fixes: https://github.com/llvm/llvm-project/issues/65806

Currently clang put extern shared var ODR-used by host
device functions in global var __clang_gpu_used_external.
This behavior was due to https://reviews.llvm.org/D123441.
However, clang should not do that for extern shared vars
since their addresses are per warp, therefore cannot be
accessed by host code.
---
 clang/include/clang/AST/ASTContext.h       |  3 +++
 clang/lib/Sema/SemaExpr.cpp                |  1 +
 clang/test/CodeGenCUDA/host-used-extern.cu | 20 +++++++++++++++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ec6b579870abbdc..4ee32c76a95d8e3 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1141,6 +1141,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable TagDecl *MSGuidTagDecl = nullptr;
 
   /// Keep track of CUDA/HIP device-side variables ODR-used by host code.
+  /// This does not include extern shared variables used by device host
+  /// functions as addresses of shared variables are per warp, therefore
+  /// cannot be accessed by host code.
   llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost;
 
   /// Keep track of CUDA/HIP external kernels or device variables ODR-used by
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3e9c3fad03918f0..92496b03ecabe54 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19136,6 +19136,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
                                : diag::note_cuda_host_var);
       }
     } else if (VarTarget == Sema::CVT_Device &&
+               !Var->hasAttr<CUDASharedAttr>() &&
                (UserTarget == Sema::CFT_Host ||
                 UserTarget == Sema::CFT_HostDevice)) {
       // Record a CUDA/HIP device side variable if it is ODR-used
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu b/clang/test/CodeGenCUDA/host-used-extern.cu
index c7edabf5c582b6f..e8f8e12aad47d1c 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -9,6 +9,11 @@
 // RUN:   -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
 // RUN:   | FileCheck -check-prefixes=NEG,NORDC %s
 
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x hip %s \
+// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - \
+// RUN:   | FileCheck -check-prefix=HOST-NEG %s
+
+
 #include "Inputs/cuda.h"
 
 // CHECK-LABEL: @__clang_gpu_used_external = internal {{.*}}global
@@ -21,10 +26,13 @@
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @shvar
 // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel1v
 // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel4v
 // NORDC-NOT: @__clang_gpu_used_external = {{.*}} @var1
-
+// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @ext_shvar
+// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @shvar
 __global__ void kernel1();
 
 // kernel2 is not marked as used since it is a definition.
@@ -49,3 +57,13 @@ void test() {
   void *p = (void*)kernel4;
   use(&var1);
 }
+
+__global__ void test_lambda_using_extern_shared() {
+  extern __shared__ int ext_shvar[];
+  __shared__ int shvar[10];
+  auto lambda = [&]() {
+    ext_shvar[0] = 1;
+    shvar[0] = 2;
+  };
+  lambda();
+}



More information about the cfe-commits mailing list