[clang] [Clang] Add `-fdefault-generic-addrspace` flag for targeting GPUs (PR #115777)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 11 14:19:34 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/115777

>From 34eb41f58518c79c627b5c1cc522c4e142c2d420 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 11 Nov 2024 16:10:31 -0600
Subject: [PATCH] [Clang] Add `-fdefault-generic-addrspace` flag for targeting
 GPUs

Summary:
GPU targets support several different address spaces which have
differing semantics. When targeting C/C++ we have a very pessimistic
view that these address spaces are completely incompatible. This has a
lot of unfortable effects that limit using address spaces in C++ as well
as making it more difficult to work with. Flat addressing is supported
by the major GPU targets, so it's highly desierable to use.

The C/C++ standard says nothing about address spaces, so we cannot make
any assumptions. However, OpenCL has an option that causes all pointers
to be seen as 'generic'. This patch adds support for making every
address space as `__generic` by default, similar to the CL extensions.
This allows us to use this behavior outside of OpenCL mode. I have
re-used the language option as it seemed easier than creating a second
one.

This works in most cases, however it does cause some problems for cases
like this, as the default pointer type is now `__generic T` so it fails
to bind to `T`. But since this is an opt-in thing it seems fine to force
the user to add an extra template, or remove the qualifiers.
```c
template<typename T> void foo(T *, T);
```
---
 clang/include/clang/Driver/Options.td     |   3 +
 clang/lib/Driver/ToolChains/Clang.cpp     |   3 +
 clang/lib/Frontend/CompilerInvocation.cpp |   4 +
 clang/lib/Sema/Sema.cpp                   |   2 +-
 clang/lib/Sema/SemaDeclCXX.cpp            |   3 +-
 clang/lib/Sema/SemaType.cpp               |  11 +-
 clang/test/CodeGen/generic-addrspace.cpp  | 180 ++++++++++++++++++++++
 7 files changed, 200 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/CodeGen/generic-addrspace.cpp

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1304ef3c5a228b..0d6f2c3410e9a0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3710,6 +3710,9 @@ def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested
 } // let Visibility = [ClangOption, CC1Option, FC1Option]
 } // let Flags = [NoArgumentUnused, HelpHidden]
 
+def fdefault_generic_addrspace : Flag<["-"], "fdefault-generic-addrspace">, Group<f_Group>,
+  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Allow pointers to be implicitly casted to other address spaces.">;
 def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>,
   Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Do not create a host fallback if offloading to the device fails.">,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 0952262c360185..d997a1d232e83d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7067,6 +7067,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_nogpulib))
     CmdArgs.push_back("-nogpulib");
 
+  if (Args.hasArg(options::OPT_fdefault_generic_addrspace))
+    CmdArgs.push_back("-fdefault-generic-addrspace");
+
   if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
     CmdArgs.push_back(
         Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index b5fd35aaa1e841..b44f04d0f275e0 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3662,6 +3662,9 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
   if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200))
     GenerateArg(Consumer, OPT_fblocks);
 
+  if (Opts.OpenCLGenericAddressSpace)
+    GenerateArg(Consumer, OPT_fdefault_generic_addrspace);
+
   if (Opts.ConvergentFunctions)
     GenerateArg(Consumer, OPT_fconvergent_functions);
   else
@@ -3939,6 +3942,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
   // These need to be parsed now. They are used to set OpenCL defaults.
   Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
   Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins);
+  Opts.OpenCLGenericAddressSpace = Args.hasArg(OPT_fdefault_generic_addrspace);
 
   LangOptions::setLangDefaults(Opts, IK.getLanguage(), T, Includes, LangStd);
 
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 2b51765e80864a..2920220948d145 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1579,7 +1579,7 @@ NamedDecl *Sema::getCurFunctionOrMethodDecl() const {
 }
 
 LangAS Sema::getDefaultCXXMethodAddrSpace() const {
-  if (getLangOpts().OpenCL)
+  if (getLangOpts().OpenCL || getLangOpts().OpenCLGenericAddressSpace)
     return getASTContext().getDefaultOpenCLPointeeAddrSpace();
   return LangAS::Default;
 }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 8d76a35b2d2557..d855d9e3f49bc9 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -16178,7 +16178,8 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
       << FnDecl->getDeclName();
 
   QualType FirstParamType = FnDecl->getParamDecl(0)->getType();
-  if (SemaRef.getLangOpts().OpenCLCPlusPlus) {
+  if (SemaRef.getLangOpts().OpenCLCPlusPlus ||
+      SemaRef.getLangOpts().OpenCLGenericAddressSpace) {
     // The operator is valid on any address space for OpenCL.
     // Drop address space from actual and expected first parameter types.
     if (const auto *PtrTy =
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index eb7516b3ef1ece..7ea663eacd0452 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1836,7 +1836,7 @@ QualType Sema::BuildPointerType(QualType T,
   if (getLangOpts().ObjCAutoRefCount)
     T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ false);
 
-  if (getLangOpts().OpenCL)
+  if (getLangOpts().OpenCL || getLangOpts().OpenCLGenericAddressSpace)
     T = deduceOpenCLPointeeAddrSpace(*this, T);
 
   // In WebAssembly, pointers to reference types and pointers to tables are
@@ -1913,7 +1913,7 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue,
   if (getLangOpts().ObjCAutoRefCount)
     T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ true);
 
-  if (getLangOpts().OpenCL)
+  if (getLangOpts().OpenCL || getLangOpts().OpenCLGenericAddressSpace)
     T = deduceOpenCLPointeeAddrSpace(*this, T);
 
   // In WebAssembly, references to reference types and tables are illegal.
@@ -2741,7 +2741,7 @@ QualType Sema::BuildBlockPointerType(QualType T,
   if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer))
     return QualType();
 
-  if (getLangOpts().OpenCL)
+  if (getLangOpts().OpenCL || getLangOpts().OpenCLGenericAddressSpace)
     T = deduceOpenCLPointeeAddrSpace(*this, T);
 
   return Context.getBlockPointerType(T);
@@ -5289,7 +5289,10 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
                      DeclaratorContext::LambdaExpr;
         };
 
-        if (state.getSema().getLangOpts().OpenCLCPlusPlus && IsClassMember()) {
+        if ((state.getSema().getLangOpts().OpenCLCPlusPlus ||
+             (!state.getSema().getLangOpts().OpenCL &&
+              state.getSema().getLangOpts().OpenCLGenericAddressSpace)) &&
+            IsClassMember()) {
           LangAS ASIdx = LangAS::Default;
           // Take address space attr if any and mark as invalid to avoid adding
           // them later while creating QualType.
diff --git a/clang/test/CodeGen/generic-addrspace.cpp b/clang/test/CodeGen/generic-addrspace.cpp
new file mode 100644
index 00000000000000..2fda27d0f0ba46
--- /dev/null
+++ b/clang/test/CodeGen/generic-addrspace.cpp
@@ -0,0 +1,180 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fdefault-generic-addrspace -emit-llvm \
+// RUN:   -fvisibility=hidden -o - %s | FileCheck %s --check-prefix=NVPTX
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fdefault-generic-addrspace -emit-llvm \
+// RUN:   -fvisibility=hidden -o - %s | FileCheck %s --check-prefix=AMDGPU
+
+// NVPTX-LABEL: define hidden void @_Z1fPv(
+// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// NVPTX-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z1fPv(
+// AMDGPU-SAME: ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    ret void
+//
+void f(void *p) {}
+
+// NVPTX-LABEL: define hidden void @_Z2p1Pv(
+// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// NVPTX-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    call void @_Z1fPv(ptr noundef [[TMP0]]) #[[ATTR1:[0-9]+]]
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z2p1Pv(
+// AMDGPU-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    call void @_Z1fPv(ptr noundef [[TMP0]]) #[[ATTR1:[0-9]+]]
+// AMDGPU-NEXT:    ret void
+//
+void p1(void [[clang::opencl_generic]] * p) { f(p); }
+// NVPTX-LABEL: define hidden noundef ptr @_Z2p2PU3AS3v(
+// NVPTX-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 8
+// NVPTX-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
+// NVPTX-NEXT:    ret ptr [[TMP1]]
+//
+// AMDGPU-LABEL: define hidden noundef ptr @_Z2p2PU3AS3v(
+// AMDGPU-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// AMDGPU-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
+// AMDGPU-NEXT:    ret ptr [[TMP1]]
+//
+void *p2(void [[clang::opencl_local]] * p) { return p; }
+// NVPTX-LABEL: define hidden noundef ptr @_Z2p3PU3AS3v(
+// NVPTX-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 8
+// NVPTX-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
+// NVPTX-NEXT:    ret ptr [[TMP1]]
+//
+// AMDGPU-LABEL: define hidden noundef ptr @_Z2p3PU3AS3v(
+// AMDGPU-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// AMDGPU-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
+// AMDGPU-NEXT:    ret ptr [[TMP1]]
+//
+void *p3(void [[clang::address_space(3)]] * p) { return p; }
+
+struct S {
+  S() = default;
+  ~S() = default;
+  void foo() {}
+};
+
+S s1;
+S [[clang::opencl_global]] s2;
+S [[clang::opencl_local]] s3;
+
+template <typename Ty> void foo(Ty *) {}
+
+// NVPTX-LABEL: define hidden void @_Z2t1Pv(
+// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// NVPTX-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    call void @_Z3fooIvEvPT_(ptr noundef [[TMP0]]) #[[ATTR1]]
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z2t1Pv(
+// AMDGPU-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    call void @_Z3fooIvEvPT_(ptr noundef [[TMP0]]) #[[ATTR1]]
+// AMDGPU-NEXT:    ret void
+//
+void t1(void *p) { foo(p); }
+// NVPTX-LABEL: define hidden void @_Z2t2Pv(
+// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// NVPTX-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    call void @_Z3fooIvEvPT_(ptr noundef [[TMP0]]) #[[ATTR1]]
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z2t2Pv(
+// AMDGPU-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    call void @_Z3fooIvEvPT_(ptr noundef [[TMP0]]) #[[ATTR1]]
+// AMDGPU-NEXT:    ret void
+//
+void t2(void [[clang::opencl_generic]] *p) { foo(p); }
+// NVPTX-LABEL: define hidden void @_Z2t3PU3AS3v(
+// NVPTX-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 8
+// NVPTX-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    call void @_Z3fooIU3AS3vEvPT_(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR1]]
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z2t3PU3AS3v(
+// AMDGPU-SAME: ptr addrspace(3) noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr addrspace(3) [[P]], ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(3), ptr [[P_ADDR_ASCAST]], align 4
+// AMDGPU-NEXT:    call void @_Z3fooIU3AS3vEvPT_(ptr addrspace(3) noundef [[TMP0]]) #[[ATTR1]]
+// AMDGPU-NEXT:    ret void
+//
+void t3(void [[clang::opencl_local]] *p) { foo(p); }
+// NVPTX-LABEL: define hidden void @_Z2t4PU5AS999v(
+// NVPTX-SAME: ptr addrspace(999) noundef [[P:%.*]]) #[[ATTR0]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(999), align 8
+// NVPTX-NEXT:    store ptr addrspace(999) [[P]], ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load ptr addrspace(999), ptr [[P_ADDR]], align 8
+// NVPTX-NEXT:    call void @_Z3fooIU5AS999vEvPT_(ptr addrspace(999) noundef [[TMP0]]) #[[ATTR1]]
+// NVPTX-NEXT:    ret void
+//
+// AMDGPU-LABEL: define hidden void @_Z2t4PU5AS999v(
+// AMDGPU-SAME: ptr addrspace(999) noundef [[P:%.*]]) #[[ATTR0]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(999), align 8, addrspace(5)
+// AMDGPU-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// AMDGPU-NEXT:    store ptr addrspace(999) [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(999), ptr [[P_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    call void @_Z3fooIU5AS999vEvPT_(ptr addrspace(999) noundef [[TMP0]]) #[[ATTR1]]
+// AMDGPU-NEXT:    ret void
+//
+void t4(void [[clang::address_space(999)]] *p) { foo(p); }



More information about the cfe-commits mailing list