[clang] [CUDA][HIP] Do not mark extern shared var (PR #65990)
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 11 11:13:45 PDT 2023
https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/65990:
Fixes: https://github.com/llvm/llvm-project/issues/65806
Currently clang put extern shared var ODR-used by host device functions in global var __clang_gpu_used_external. This behavior was due to https://reviews.llvm.org/D123441. However, clang should not do that for extern shared vars since their addresses are per warp, therefore cannot be accessed by host code.
>From 4557e1e93db2c917048f276969808ac894744a71 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Mon, 11 Sep 2023 13:13:50 -0400
Subject: [PATCH] [CUDA][HIP] Do not mark extern shared var
Fixes: https://github.com/llvm/llvm-project/issues/65806
Currently clang put extern shared var ODR-used by host
device functions in global var __clang_gpu_used_external.
This behavior was due to https://reviews.llvm.org/D123441.
However, clang should not do that for extern shared vars
since their addresses are per warp, therefore cannot be
accessed by host code.
---
clang/include/clang/AST/ASTContext.h | 3 +++
clang/lib/Sema/SemaExpr.cpp | 1 +
clang/test/CodeGenCUDA/host-used-extern.cu | 20 +++++++++++++++++++-
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ec6b579870abbdc..4ee32c76a95d8e3 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1141,6 +1141,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
mutable TagDecl *MSGuidTagDecl = nullptr;
/// Keep track of CUDA/HIP device-side variables ODR-used by host code.
+ /// This does not include extern shared variables used by device host
+ /// functions as addresses of shared variables are per warp, therefore
+ /// cannot be accessed by host code.
llvm::DenseSet<const VarDecl *> CUDADeviceVarODRUsedByHost;
/// Keep track of CUDA/HIP external kernels or device variables ODR-used by
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3e9c3fad03918f0..92496b03ecabe54 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19136,6 +19136,7 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
: diag::note_cuda_host_var);
}
} else if (VarTarget == Sema::CVT_Device &&
+ !Var->hasAttr<CUDASharedAttr>() &&
(UserTarget == Sema::CFT_Host ||
UserTarget == Sema::CFT_HostDevice)) {
// Record a CUDA/HIP device side variable if it is ODR-used
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu b/clang/test/CodeGenCUDA/host-used-extern.cu
index c7edabf5c582b6f..e8f8e12aad47d1c 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -9,6 +9,11 @@
// RUN: -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
// RUN: | FileCheck -check-prefixes=NEG,NORDC %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x hip %s \
+// RUN: -fgpu-rdc -std=c++11 -emit-llvm -o - \
+// RUN: | FileCheck -check-prefix=HOST-NEG %s
+
+
#include "Inputs/cuda.h"
// CHECK-LABEL: @__clang_gpu_used_external = internal {{.*}}global
@@ -21,10 +26,13 @@
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @shvar
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel1v
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel4v
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @var1
-
+// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @ext_shvar
+// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @shvar
__global__ void kernel1();
// kernel2 is not marked as used since it is a definition.
@@ -49,3 +57,13 @@ void test() {
void *p = (void*)kernel4;
use(&var1);
}
+
+__global__ void test_lambda_using_extern_shared() {
+ extern __shared__ int ext_shvar[];
+ __shared__ int shvar[10];
+ auto lambda = [&]() {
+ ext_shvar[0] = 1;
+ shvar[0] = 2;
+ };
+ lambda();
+}
More information about the cfe-commits
mailing list