[clang] a3ce7f5 - [HIP] Fix managed variable linkage

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 23 19:36:23 PST 2021


Author: Yaxun (Sam) Liu
Date: 2021-02-23T22:34:45-05:00
New Revision: a3ce7f5cd2ae236bec7752e343f4b63ddda7ebe7

URL: https://github.com/llvm/llvm-project/commit/a3ce7f5cd2ae236bec7752e343f4b63ddda7ebe7
DIFF: https://github.com/llvm/llvm-project/commit/a3ce7f5cd2ae236bec7752e343f4b63ddda7ebe7.diff

LOG: [HIP] Fix managed variable linkage

Currently managed variables are emitted as undefined symbols, which
causes difficulty for diagnosing undefined symbols for non-managed
variables.

This patch transforms managed variables in device compilation so that
they can be emitted as normal variables.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D96195

Added: 
    

Modified: 
    clang/lib/CodeGen/CGCUDANV.cpp
    clang/lib/CodeGen/CGCUDARuntime.h
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/CodeGenCUDA/device-var-linkage.cu
    clang/test/CodeGenCUDA/managed-var.cu
    llvm/lib/IR/ReplaceConstant.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 4c3294ae2d65..dccbd6f74a98 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -144,20 +144,24 @@ class CGNVCUDARuntime : public CGCUDARuntime {
                            /*Managed*/ false, Normalized, Type}});
   }
 
+  /// Creates module constructor function
+  llvm::Function *makeModuleCtorFunction();
+  /// Creates module destructor function
+  llvm::Function *makeModuleDtorFunction();
+  /// Transform managed variables for device compilation.
+  void transformManagedVars();
+
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
   void handleVarRegistration(const VarDecl *VD,
                              llvm::GlobalVariable &Var) override;
-
-  /// Creates module constructor function
-  llvm::Function *makeModuleCtorFunction() override;
-  /// Creates module destructor function
-  llvm::Function *makeModuleDtorFunction() override;
   void
   internalizeDeviceSideVar(const VarDecl *D,
                            llvm::GlobalValue::LinkageTypes &Linkage) override;
+
+  llvm::Function *finalizeModule() override;
 };
 
 }
@@ -534,6 +538,9 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
       addUnderscoredPrefixToName("RegisterTexture"));
   for (auto &&Info : DeviceVars) {
     llvm::GlobalVariable *Var = Info.Var;
+    assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
+           "External variables should not show up here, except HIP managed "
+           "variables");
     llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
     switch (Info.Flags.getKind()) {
     case DeviceVarFlags::Variable: {
@@ -543,11 +550,16 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
         auto ManagedVar = new llvm::GlobalVariable(
             CGM.getModule(), Var->getType(),
             /*isConstant=*/false, Var->getLinkage(),
-            /*Init=*/llvm::ConstantPointerNull::get(Var->getType()),
-            Twine(Var->getName() + ".managed"), /*InsertBefore=*/nullptr,
+            /*Init=*/Var->isDeclaration()
+                ? nullptr
+                : llvm::ConstantPointerNull::get(Var->getType()),
+            /*Name=*/"", /*InsertBefore=*/nullptr,
             llvm::GlobalVariable::NotThreadLocal);
         ManagedVar->setDSOLocal(Var->isDSOLocal());
         ManagedVar->setVisibility(Var->getVisibility());
+        ManagedVar->setExternallyInitialized(true);
+        ManagedVar->takeName(Var);
+        Var->setName(Twine(ManagedVar->getName() + ".managed"));
         replaceManagedVar(Var, ManagedVar);
         llvm::Value *Args[] = {
             &GpuBinaryHandlePtr,
@@ -556,7 +568,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
             VarName,
             llvm::ConstantInt::get(VarSizeTy, VarSize),
             llvm::ConstantInt::get(IntTy, Var->getAlignment())};
-        Builder.CreateCall(RegisterManagedVar, Args);
+        if (!Var->isDeclaration())
+          Builder.CreateCall(RegisterManagedVar, Args);
       } else {
         llvm::Value *Args[] = {
             &GpuBinaryHandlePtr,
@@ -968,9 +981,13 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
     // discarded and referencing a discarded local symbol from outside the
     // comdat (__cuda_register_globals) is disallowed by the ELF spec.
     // TODO: Reject __device__ constexpr and __device__ inline in Sema.
-    if (!D->hasExternalStorage() && !D->isInline())
+    // HIP managed variables need to be always recorded in device and host
+    // compilations for transformation.
+    if ((!D->hasExternalStorage() && !D->isInline()) ||
+        D->hasAttr<HIPManagedAttr>()) {
       registerDeviceVar(D, GV, !D->hasDefinition(),
                         D->hasAttr<CUDAConstantAttr>());
+    }
   } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
              D->getType()->isCUDADeviceBuiltinTextureType()) {
     // Builtin surfaces and textures and their template arguments are
@@ -998,3 +1015,47 @@ void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
     }
   }
 }
+
+// Transform managed variables to pointers to managed variables in device code.
+// Each use of the original managed variable is replaced by a load from the
+// transformed managed variable. The transformed managed variable contains
+// the address of managed memory which will be allocated by the runtime.
+void CGNVCUDARuntime::transformManagedVars() {
+  for (auto &&Info : DeviceVars) {
+    llvm::GlobalVariable *Var = Info.Var;
+    if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
+        Info.Flags.isManaged()) {
+      auto ManagedVar = new llvm::GlobalVariable(
+          CGM.getModule(), Var->getType(),
+          /*isConstant=*/false, Var->getLinkage(),
+          /*Init=*/Var->isDeclaration()
+              ? nullptr
+              : llvm::ConstantPointerNull::get(Var->getType()),
+          /*Name=*/"", /*InsertBefore=*/nullptr,
+          llvm::GlobalVariable::NotThreadLocal,
+          CGM.getContext().getTargetAddressSpace(LangAS::cuda_device));
+      ManagedVar->setDSOLocal(Var->isDSOLocal());
+      ManagedVar->setVisibility(Var->getVisibility());
+      ManagedVar->setExternallyInitialized(true);
+      replaceManagedVar(Var, ManagedVar);
+      ManagedVar->takeName(Var);
+      Var->setName(Twine(ManagedVar->getName()) + ".managed");
+      // Keep managed variables even if they are not used in device code since
+      // they need to be allocated by the runtime.
+      if (!Var->isDeclaration()) {
+        assert(!ManagedVar->isDeclaration());
+        CGM.addCompilerUsedGlobal(Var);
+        CGM.addCompilerUsedGlobal(ManagedVar);
+      }
+    }
+  }
+}
+
+// Returns module constructor to be added.
+llvm::Function *CGNVCUDARuntime::finalizeModule() {
+  if (CGM.getLangOpts().CUDAIsDevice) {
+    transformManagedVars();
+    return nullptr;
+  }
+  return makeModuleCtorFunction();
+}

diff  --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index 59d550102407..2f4b7ab1dc6d 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -86,13 +86,9 @@ class CGCUDARuntime {
   virtual void handleVarRegistration(const VarDecl *VD,
                                      llvm::GlobalVariable &Var) = 0;
 
-  /// Constructs and returns a module initialization function or nullptr if it's
-  /// not needed. Must be called after all kernels have been emitted.
-  virtual llvm::Function *makeModuleCtorFunction() = 0;
-
-  /// Returns a module cleanup function or nullptr if it's not needed.
-  /// Must be called after ModuleCtorFunction
-  virtual llvm::Function *makeModuleDtorFunction() = 0;
+  /// Finalize generated LLVM module. Returns a module constructor function
+  /// to be added or a null pointer.
+  virtual llvm::Function *finalizeModule() = 0;
 
   /// Returns function or variable name on device side even if the current
   /// compilation is for host.

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index d568e3d7095c..7e45ae3acb3e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -459,10 +459,8 @@ void CodeGenModule::Release() {
   if (ObjCRuntime)
     if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
       AddGlobalCtor(ObjCInitFunction);
-  if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
-      CUDARuntime) {
-    if (llvm::Function *CudaCtorFunction =
-            CUDARuntime->makeModuleCtorFunction())
+  if (Context.getLangOpts().CUDA && CUDARuntime) {
+    if (llvm::Function *CudaCtorFunction = CUDARuntime->finalizeModule())
       AddGlobalCtor(CudaCtorFunction);
   }
   if (OpenMPRuntime) {
@@ -3833,8 +3831,14 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
     }
   }
 
-  if (GV->isDeclaration())
+  if (GV->isDeclaration()) {
     getTargetCodeGenInfo().setTargetAttributes(D, GV, *this);
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    if (getLangOpts().CUDA && D && D->hasAttr<HIPManagedAttr>() &&
+        D->hasExternalStorage())
+      getCUDARuntime().handleVarRegistration(D, *GV);
+  }
 
   LangAS ExpectedAS =
       D ? D->getType().getAddressSpace()
@@ -4142,12 +4146,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
   bool NeedsGlobalDtor =
       D->needsDestruction(getContext()) == QualType::DK_cxx_destructor;
 
-  bool IsHIPManagedVarOnDevice =
-      getLangOpts().CUDAIsDevice && D->hasAttr<HIPManagedAttr>();
-
   const VarDecl *InitDecl;
-  const Expr *InitExpr =
-      IsHIPManagedVarOnDevice ? nullptr : D->getAnyInitializer(InitDecl);
+  const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
   Optional<ConstantEmitter> emitter;
 
@@ -4158,15 +4158,15 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
       getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();
   // Shadows of initialized device-side global variables are also left
   // undefined.
+  // Managed Variables should be initialized on both host side and device side.
   bool IsCUDAShadowVar =
       !getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
       (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
        D->hasAttr<CUDASharedAttr>());
   bool IsCUDADeviceShadowVar =
-      getLangOpts().CUDAIsDevice &&
+      getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
       (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
-       D->getType()->isCUDADeviceBuiltinTextureType() ||
-       D->hasAttr<HIPManagedAttr>());
+       D->getType()->isCUDADeviceBuiltinTextureType());
   if (getLangOpts().CUDA &&
       (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar))
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
@@ -4273,14 +4273,11 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
         GV->setExternallyInitialized(true);
     } else {
       getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
-      getCUDARuntime().handleVarRegistration(D, *GV);
     }
+    getCUDARuntime().handleVarRegistration(D, *GV);
   }
 
-  // HIP managed variables need to be emitted as declarations in device
-  // compilation.
-  if (!IsHIPManagedVarOnDevice)
-    GV->setInitializer(Init);
+  GV->setInitializer(Init);
   if (emitter)
     emitter->finalize(GV);
 

diff  --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu
index ec67c77be638..01fde03ea49d 100644
--- a/clang/test/CodeGenCUDA/device-var-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -21,9 +21,9 @@ __device__ int v1;
 // NORDC-H-DAG: @v2 = internal global i32 undef
 // RDC-H-DAG: @v2 = dso_local global i32 undef
 __constant__ int v2;
-// DEV-DAG: @v3 = external addrspace(1) externally_initialized global i32
-// NORDC-H-DAG: @v3 = internal global i32 0
-// RDC-H-DAG: @v3 = dso_local global i32 0
+// DEV-DAG: @v3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-H-DAG: @v3 = internal externally_initialized global i32* null
+// RDC-H-DAG: @v3 = dso_local externally_initialized global i32* null
 __managed__ int v3;
 
 // DEV-DAG: @ev1 = external addrspace(1) global i32
@@ -32,8 +32,8 @@ extern __device__ int ev1;
 // DEV-DAG: @ev2 = external addrspace(4) global i32
 // HOST-DAG: @ev2 = external global i32
 extern __constant__ int ev2;
-// DEV-DAG: @ev3 = external addrspace(1) global i32
-// HOST-DAG: @ev3 = external global i32
+// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)*
+// HOST-DAG: @ev3 = external externally_initialized global i32*
 extern __managed__ int ev3;
 
 // NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0
@@ -44,8 +44,8 @@ static __device__ int sv1;
 // RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0
 // HOST-DAG: @_ZL3sv2 = internal global i32 undef
 static __constant__ int sv2;
-// DEV-DAG: @_ZL3sv3 = external addrspace(1) externally_initialized global i32
-// HOST-DAG: @_ZL3sv3 = internal global i32 0
+// DEV-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
 static __managed__ int sv3;
 
 __device__ __host__ int work(int *x);

diff  --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 2dceec03e91f..1657487bccee 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -2,47 +2,62 @@
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
 // RUN:   -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=DEV %s
+// RUN:   -check-prefixes=COMMON,DEV %s
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
 // RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=DEV %s
+// RUN:   -check-prefixes=COMMON,DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
 // RUN:   -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=HOST,NORDC %s
+// RUN:   -check-prefixes=COMMON,HOST,NORDC %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
 // RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=HOST,RDC %s
+// RUN:   -check-prefixes=COMMON,HOST,RDC %s
 
 #include "Inputs/cuda.h"
 
-// DEV-DAG: @x = external addrspace(1) externally_initialized global i32
-// NORDC-DAG: @x = internal global i32 1
-// RDC-DAG: @x = dso_local global i32 1
-// NORDC-DAG: @x.managed = internal global i32* null
-// RDC-DAG: @x.managed = dso_local global i32* null
-// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"x\00"
-
 struct vec {
   float x,y,z;
 };
 
+// DEV-DAG: @x.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4
+// DEV-DAG: @x = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// NORDC-DAG: @x.managed = internal global i32 1
+// RDC-DAG: @x.managed = dso_local global i32 1
+// NORDC-DAG: @x = internal externally_initialized global i32* null
+// RDC-DAG: @x = dso_local externally_initialized global i32* null
+// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"x\00"
 __managed__ int x = 1;
+
+// DEV-DAG: @v.managed = dso_local addrspace(1) externally_initialized global [100 x %struct.vec] zeroinitializer, align 4
+// DEV-DAG: @v = dso_local addrspace(1) externally_initialized global [100 x %struct.vec] addrspace(1)* null
 __managed__ vec v[100];
+
+// DEV-DAG: @v2.managed = dso_local addrspace(1) externally_initialized global <{ %struct.vec, [99 x %struct.vec] }> <{ %struct.vec { float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 }, [99 x %struct.vec] zeroinitializer }>, align 4
+// DEV-DAG: @v2 = dso_local addrspace(1) externally_initialized global <{ %struct.vec, [99 x %struct.vec] }> addrspace(1)* null
 __managed__ vec v2[100] = {{1, 1, 1}};
 
-// DEV-DAG: @ex = external addrspace(1) global i32
-// HOST-DAG: @ex = external global i32
+// DEV-DAG: @ex.managed = external addrspace(1) global i32, align 4
+// DEV-DAG: @ex = external addrspace(1) externally_initialized global i32 addrspace(1)*
+// HOST-DAG: @ex.managed = external global i32
+// HOST-DAG: @ex = external externally_initialized global i32*
 extern __managed__ int ex;
 
-// DEV-DAG: @_ZL2sx = external addrspace(1) externally_initialized global i32
-// HOST-DAG: @_ZL2sx = internal global i32 1
-// HOST-DAG: @_ZL2sx.managed = internal global i32* null
+// DEV-DAG: @_ZL2sx.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4
+// DEV-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null
+// HOST-DAG: @_ZL2sx.managed = internal global i32 1
+// HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
 static __managed__ int sx = 1;
 
-// HOST-NOT: @ex.managed
+// DEV-DAG: @llvm.compiler.used
+// DEV-SAME-DAG: @x.managed
+// DEV-SAME-DAG: @x
+// DEV-SAME-DAG: @v.managed
+// DEV-SAME-DAG: @v
+// DEV-SAME-DAG: @_ZL2sx.managed
+// DEV-SAME-DAG: @_ZL2sx
 
 // Force ex and sx mitted in device compilation.
 __global__ void foo(int *z) {
@@ -55,42 +70,53 @@ int foo2() {
   return ex + sx;
 }
 
-// HOST-LABEL: define {{.*}}@_Z4loadv()
-// HOST:  %ld.managed = load i32*, i32** @x.managed, align 4
+// COMMON-LABEL: define {{.*}}@_Z4loadv()
+// DEV:  %ld.managed = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @x, align 4
+// DEV:  %0 = addrspacecast i32 addrspace(1)* %ld.managed to i32*
+// DEV:  %1 = load i32, i32* %0, align 4
+// DEV:  ret i32 %1
+// HOST:  %ld.managed = load i32*, i32** @x, align 4
 // HOST:  %0 = load i32, i32* %ld.managed, align 4
 // HOST:  ret i32 %0
-int load() {
+__device__ __host__ int load() {
   return x;
 }
 
-// HOST-LABEL: define {{.*}}@_Z5storev()
-// HOST:  %ld.managed = load i32*, i32** @x.managed, align 4
+// COMMON-LABEL: define {{.*}}@_Z5storev()
+// DEV:  %ld.managed = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @x, align 4
+// DEV:  %0 = addrspacecast i32 addrspace(1)* %ld.managed to i32*
+// DEV:  store i32 2, i32* %0, align 4
+// HOST:  %ld.managed = load i32*, i32** @x, align 4
 // HOST:  store i32 2, i32* %ld.managed, align 4
-void store() {
+__device__ __host__ void store() {
   x = 2;
 }
 
-// HOST-LABEL: define {{.*}}@_Z10addr_takenv()
-// HOST:  %ld.managed = load i32*, i32** @x.managed, align 4
+// COMMON-LABEL: define {{.*}}@_Z10addr_takenv()
+// DEV:  %0 = addrspacecast i32 addrspace(1)* %ld.managed to i32*
+// DEV:  store i32* %0, i32** %p.ascast, align 8
+// DEV:  %1 = load i32*, i32** %p.ascast, align 8
+// DEV:  store i32 3, i32* %1, align 4
+// HOST:  %ld.managed = load i32*, i32** @x, align 4
 // HOST:  store i32* %ld.managed, i32** %p, align 8
 // HOST:  %0 = load i32*, i32** %p, align 8
 // HOST:  store i32 3, i32* %0, align 4
-void addr_taken() {
+__device__ __host__ void addr_taken() {
   int *p = &x;
   *p = 3;
 }
 
 // HOST-LABEL: define {{.*}}@_Z5load2v()
-// HOST: %ld.managed = load [100 x %struct.vec]*, [100 x %struct.vec]** @v.managed, align 16
+// HOST: %ld.managed = load [100 x %struct.vec]*, [100 x %struct.vec]** @v, align 16
 // HOST:  %0 = getelementptr inbounds [100 x %struct.vec], [100 x %struct.vec]* %ld.managed, i64 0, i64 1, i32 0
 // HOST:  %1 = load float, float* %0, align 4
 // HOST:  ret float %1
-float load2() {
+__device__ __host__ float load2() {
   return v[1].x;
 }
 
 // HOST-LABEL: define {{.*}}@_Z5load3v()
-// HOST:  %ld.managed = load <{ %struct.vec, [99 x %struct.vec] }>*, <{ %struct.vec, [99 x %struct.vec] }>** @v2.managed, align 16
+// HOST:  %ld.managed = load <{ %struct.vec, [99 x %struct.vec] }>*, <{ %struct.vec, [99 x %struct.vec] }>** @v2, align 16
 // HOST:  %0 = bitcast <{ %struct.vec, [99 x %struct.vec] }>* %ld.managed to [100 x %struct.vec]*
 // HOST:  %1 = getelementptr inbounds [100 x %struct.vec], [100 x %struct.vec]* %0, i64 0, i64 1, i32 1
 // HOST:  %2 = load float, float* %1, align 4
@@ -100,10 +126,10 @@ float load3() {
 }
 
 // HOST-LABEL: define {{.*}}@_Z11addr_taken2v()
-// HOST:  %ld.managed = load [100 x %struct.vec]*, [100 x %struct.vec]** @v.managed, align 16
+// HOST:  %ld.managed = load [100 x %struct.vec]*, [100 x %struct.vec]** @v, align 16
 // HOST:  %0 = getelementptr inbounds [100 x %struct.vec], [100 x %struct.vec]* %ld.managed, i64 0, i64 1, i32 0
 // HOST:  %1 = ptrtoint float* %0 to i64
-// HOST:  %ld.managed1 = load <{ %struct.vec, [99 x %struct.vec] }>*, <{ %struct.vec, [99 x %struct.vec] }>** @v2.managed, align 16
+// HOST:  %ld.managed1 = load <{ %struct.vec, [99 x %struct.vec] }>*, <{ %struct.vec, [99 x %struct.vec] }>** @v2, align 16
 // HOST:  %2 = bitcast <{ %struct.vec, [99 x %struct.vec] }>* %ld.managed1 to [100 x %struct.vec]*
 // HOST:  %3 = getelementptr inbounds [100 x %struct.vec], [100 x %struct.vec]* %2, i64 0, i64 1, i32 1
 // HOST:  %4 = ptrtoint float* %3 to i64
@@ -115,7 +141,19 @@ float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
 }
 
-// HOST-DAG: __hipRegisterManagedVar({{.*}}@x.managed {{.*}}@x {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4)
-// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx.managed {{.*}}@_ZL2sx
-// HOST-NOT: __hipRegisterManagedVar({{.*}}@ex.managed {{.*}}@ex
+// COMMON-LABEL: define {{.*}}@_Z5load4v()
+// DEV:  %ld.managed = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @ex, align 4
+// DEV:  %0 = addrspacecast i32 addrspace(1)* %ld.managed to i32*
+// DEV:  %1 = load i32, i32* %0, align 4
+// DEV:  ret i32 %1
+// HOST:  %ld.managed = load i32*, i32** @ex, align 4
+// HOST:  %0 = load i32, i32* %ld.managed, align 4
+// HOST:  ret i32 %0
+__device__ __host__ int load4() {
+  return ex;
+}
+
+// HOST-DAG: __hipRegisterManagedVar({{.*}}@x {{.*}}@x.managed {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4)
+// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.managed
+// HOST-NOT: __hipRegisterManagedVar({{.*}}@ex {{.*}}@ex.managed
 // HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32)

diff  --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index 7efa525d427e..2cc0650be8b1 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -60,6 +60,7 @@ Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::BitCast:
+  case Instruction::AddrSpaceCast:
     return dyn_cast<Instruction>(
         Builder.CreateCast((Instruction::CastOps)OpCode, CE->getOperand(0),
                            CE->getType(), CE->getName()));


        


More information about the cfe-commits mailing list