[llvm] 624f12d - [amdgpu] Drop lowering of LDS used by global variables

Jon Chesterfield via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 14 13:59:35 PST 2021


Author: Jon Chesterfield
Date: 2021-12-14T21:59:26Z
New Revision: 624f12d34f94a9e814753f26f0b499870697b68c

URL: https://github.com/llvm/llvm-project/commit/624f12d34f94a9e814753f26f0b499870697b68c
DIFF: https://github.com/llvm/llvm-project/commit/624f12d34f94a9e814753f26f0b499870697b68c.diff

LOG: [amdgpu] Drop lowering of LDS used by global variables

Approximately revert D103431.

LDS variables are allocated at kernel launch and deallocated at kernel exit.
The address is therefore kernel execution dependent. Global variables are
initialized by values written to .data, which can't be done for a LDS variable
as there is no kernel running, or by a global constructor. Initializing the
global to the address of some LDS allocated by a global constructor is possible
but indistinguishable from undef.

Assigning the address of a LDS variable to a global should be a sema error. It
isn't for openmp, haven't checked other languages. Failing that it could be set
to undef, perhaps in this pass.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D115413

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h

Removed: 
    llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index 4040dc5d8b2fd..a83ff6667956c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -62,29 +62,6 @@ void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
   }
 }
 
-bool hasUserInstruction(const GlobalValue *GV) {
-  SmallPtrSet<const User *, 8> Visited;
-  SmallVector<const User *, 16> Stack(GV->users());
-
-  while (!Stack.empty()) {
-    const User *U = Stack.pop_back_val();
-
-    if (!Visited.insert(U).second)
-      continue;
-
-    if (isa<Instruction>(U))
-      return true;
-
-    append_range(Stack, U->users());
-  }
-
-  return false;
-}
-
-/// \returns true if an LDS global requires lowering to a module LDS structure
-/// if \p F is not given. If \p F is given it must be a kernel and function
-/// \returns true if an LDS global is directly used from that kernel and it
-/// is safe to replace its uses with a kernel LDS structure member.
 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
                                    const Function *F) {
   // We are not interested in kernel LDS lowering for module LDS itself.
@@ -94,7 +71,6 @@ static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
   bool Ret = false;
   SmallPtrSet<const User *, 8> Visited;
   SmallVector<const User *, 16> Stack(GV.users());
-  SmallPtrSet<const GlobalValue *, 8> GlobalUsers;
 
   assert(!F || isKernelCC(F));
 
@@ -102,15 +78,10 @@ static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
     const User *V = Stack.pop_back_val();
     Visited.insert(V);
 
-    if (auto *G = dyn_cast<GlobalValue>(V)) {
-      StringRef GName = G->getName();
-      if (F && GName != "llvm.used" && GName != "llvm.compiler.used") {
-        // For kernel LDS lowering, if G is not a compiler.used list, then we
-        // cannot lower the lds GV since we cannot replace the use of GV within
-        // G.
-        return false;
-      }
-      GlobalUsers.insert(G);
+    if (isa<GlobalValue>(V)) {
+      // This use of the LDS variable is the initializer of a global variable.
+      // This is ill formed. The address of an LDS variable is kernel dependent
+      // and unknown until runtime. It can't be written to a global variable.
       continue;
     }
 
@@ -132,15 +103,6 @@ static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
     append_range(Stack, V->users());
   }
 
-  if (!F && !Ret) {
-    // For module LDS lowering, we have not yet decided if we should lower GV or
-    // not. Explore all global users of GV, and check if atleast one of these
-    // global users appear as an use within an instruction (possibly nested use
-    // via constant expression), if so, then conservately lower LDS.
-    for (auto *G : GlobalUsers)
-      Ret |= hasUserInstruction(G);
-  }
-
   return Ret;
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index ddcaeed6809c4..83ef68cc3f60e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -26,10 +26,6 @@ bool isKernelCC(const Function *Func);
 
 Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
 
-/// \returns true if a given global variable \p GV (or its global users) appear
-/// as an use within some instruction (either from kernel or from non-kernel).
-bool hasUserInstruction(const GlobalValue *GV);
-
 std::vector<GlobalVariable *> findVariablesToLower(Module &M,
                                                    const Function *F = nullptr);
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll
deleted file mode 100644
index 3ea52f9309f61..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-global-uses.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
-
-;.
-; Kernel LDS lowering.
-;.
-; @lds.1:  is part of @llvm.used list, and also it is used within kernel, hence it is lowered.
-; @lds.2:  is part of @llvm.compiler.used list, and also it is used within kernel, hence it is lowered.
-; @lds.3:  is used as initializer to @gptr.3, hence @lds.3 is not lowered, though it is used within kernel.
-; @lds.4:  is used as initializer to @gptr.4, hence @lds.4 is not lowered, though it is used within kernel,
-;          irrespective of the uses of @gptr.4 itself ( @gptr.4 is part of llvm.compiler.used list ).
-; @lds.5:  is part of @llvm.used list, but is not used within kernel, hence it is not lowered.
-; @lds.6:  is part of @llvm.compiler.used list, but is not used within kernel, hence it is not lowered.
-;.
-
-; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { i32, i16 }
-
-; CHECK-NOT: @lds.1
-; CHECK-NOT: @lds.2
-; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
-; CHECK: @lds.4 = addrspace(3) global float undef, align 4
-; CHECK: @lds.5 = addrspace(3) global i16 undef, align 2
-; CHECK: @lds.6 = addrspace(3) global i32 undef, align 4
- at lds.1 = addrspace(3) global i16 undef, align 2
- at lds.2 = addrspace(3) global i32 undef, align 4
- at lds.3 = addrspace(3) global i64 undef, align 8
- at lds.4 = addrspace(3) global float undef, align 4
- at lds.5 = addrspace(3) global i16 undef, align 2
- at lds.6 = addrspace(3) global i32 undef, align 4
-
-; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
-; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
- at gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
- at gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
-
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 4
-
-; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
-; CHECK: @llvm.compiler.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
- at llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.5 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
- at llvm.compiler.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.6 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
-
-; CHECK-LABEL: @k0()
-; CHECK:   %ld.lds.1 = load i16, i16 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 4
-; CHECK:   %ld.lds.2 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, %llvm.amdgcn.kernel.k0.lds.t addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0), align 4
-; CHECK:   %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3, align 4
-; CHECK:   %ld.lds.4 = load float, float addrspace(3)* @lds.4, align 4
-; CHECK:   ret void
-define amdgpu_kernel void @k0() {
-  %ld.lds.1 = load i16, i16 addrspace(3)* @lds.1
-  %ld.lds.2 = load i32, i32 addrspace(3)* @lds.2
-  %ld.lds.3 = load i64, i64 addrspace(3)* @lds.3
-  %ld.lds.4 = load float, float addrspace(3)* @lds.4
-  ret void
-}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll
deleted file mode 100644
index 104c87774a72e..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-alias.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
-
-;.
-; @lds.1:  is aliased with @alias.to.lds.1, and @alias.to.lds.1 is used within kernel @k0.
-;          Hence, @lds.1 is lowered.
-; @lds.2:  is aliased with @alias.to.lds.2, and @alias.to.lds.2 is used within non-kernel @f0,
-;          Hence, @lds.2 is lowered.
-; @lds.3:  is used as initializer to global @gptr.3, and @gptr.3 is aliased with @alias.to.gptr.3,
-;          and @alias.to.gptr.3 is used within kernel @k1. Hence, @lds.3 is lowered.
-; @lds.4:  is used as initializer to global @gptr.4, and @gptr.4 is aliased with @alias.to.gptr.4,
-;          and @alias.to.gptr.4 is used within non-kernel @f1. Hence, @lds.4 is lowered.
-; @lds.5:  is aliased with @alias.to.lds.5, but neither @lds.5 nor @alias.to.lds.5 is used anywhere.
-;          Hence, @lds.5 is not lowered.
-; @lds.6:  is used as initializer to global @gptr.6, and @gptr.6 is aliased with @alias.to.gptr.6.
-;          But none of them are used anywhere. Hence, @lds.6 is not lowered.
-;.
-
-; CHECK: %llvm.amdgcn.module.lds.t = type { [4 x i8], [3 x i8], [1 x i8], [2 x i8] }
-
-; CHECK-NOT: @lds.1
-; CHECK-NOT: @lds.2
-; CHECK-NOT: @lds.3
-; CHECK-NOT: @lds.4
-; CHECK: @lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
-; CHECK: @lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
- at lds.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
- at lds.2 = internal unnamed_addr addrspace(3) global [2 x i8] undef, align 2
- at lds.3 = internal unnamed_addr addrspace(3) global [3 x i8] undef, align 4
- at lds.4 = internal unnamed_addr addrspace(3) global [4 x i8] undef, align 4
- at lds.5 = internal unnamed_addr addrspace(3) global [5 x i8] undef, align 8
- at lds.6 = internal unnamed_addr addrspace(3) global [6 x i8] undef, align 8
-
-; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([3 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([6 x i8] addrspace(3)* @lds.6 to i64 addrspace(3)*) to i64*), align 8
- at gptr.3 = addrspace(1) global i64* addrspacecast ([3 x i8] addrspace(3)* @lds.3 to i64*), align 8
- at gptr.4 = addrspace(1) global i64* addrspacecast ([4 x i8] addrspace(3)* @lds.4 to i64*), align 8
- at gptr.6 = addrspace(1) global i64* addrspacecast ([6 x i8] addrspace(3)* @lds.6 to i64*), align 8
-
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 4
-; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata"
-
-; CHECK: @alias.to.lds.1 = alias [1 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2)
-; CHECK: @alias.to.lds.2 = alias [2 x i8], getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 3)
-; CHECK: @alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
-; CHECK: @alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
-; CHECK: @alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
-; CHECK: @alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
- at alias.to.lds.1 = alias [1 x i8], [1 x i8] addrspace(3)* @lds.1
- at alias.to.lds.2 = alias [2 x i8], [2 x i8] addrspace(3)* @lds.2
- at alias.to.gptr.3 = alias i64*, i64* addrspace(1)* @gptr.3
- at alias.to.gptr.4 = alias i64*, i64* addrspace(1)* @gptr.4
- at alias.to.lds.5 = alias [5 x i8], [5 x i8] addrspace(3)* @lds.5
- at alias.to.gptr.6 = alias i64*, i64* addrspace(1)* @gptr.6
-
-; CHECK-LABEL: @f1
-; CHECK:   %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4, align 8
-; CHECK:   ret void
-define void @f1() {
-  %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.4
-  ret void
-}
-
-; CHECK-LABEL: @f0
-; CHECK:   %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
-; CHECK:   store i8 1, i8 addrspace(3)* %bc, align 2
-; CHECK:   ret void
-define void @f0() {
-  %bc = bitcast [2 x i8] addrspace(3)* @alias.to.lds.2 to i8 addrspace(3)*
-  store i8 1, i8 addrspace(3)* %bc, align 2
-  ret void
-}
-
-; CHECK-LABEL: @k1
-; CHECK-LABEL:   call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
-; CHECK-LABEL:   %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3, align 8
-; CHECK-LABEL:   ret void
-define amdgpu_kernel void @k1() {
-  %ld = load i64*, i64* addrspace(1)* @alias.to.gptr.3
-  ret void
-}
-
-; CHECK-LABEL: @k0
-; CHECK-LABEL:   call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
-; CHECK-LABEL:   %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
-; CHECK-LABEL:   store i8 1, i8 addrspace(3)* %bc, align 1
-; CHECK-LABEL:   ret void
-define amdgpu_kernel void @k0() {
-  %bc = bitcast [1 x i8] addrspace(3)* @alias.to.lds.1 to i8 addrspace(3)*
-  store i8 1, i8 addrspace(3)* %bc, align 1
-  ret void
-}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll
deleted file mode 100644
index 77fcefa7944d3..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-global-uses.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
-
-;.
-; @lds.1:  is part of @llvm.used list, and is no-where used. Hence it is not lowered.
-; @lds.2:  is part of @llvm.compiler.used list, and is no-where used. Hence it is not lowered.
-; @lds.3:  is used as initializer to @gptr.3, and is no-where used. @gptr.3 itself is also not
-;          used anywhere else, hence @lds.3 is not lowered.
-; @lds.4:  is used as initializer to @gptr.4, and is no-where used. @gptr.4 is part of
-;          @llvm.compiler.used list, but is no-where else used. hence @lds.4 is not lowered.
-;
-; @lds.5:  is used as initializer to @gptr.5, and is no-where used. @gptr.5 is part of
-;          @llvm.compiler.used list, but is also used within kernel @k0. Hence @lds.5 is lowered.
-; @lds.6:  is used as initializer to @gptr.6, and is no-where used. @gptr.6 is part of
-;          @llvm.compiler.used list, but is also used within non-kernel function @f0. Hence @lds.6 is lowered.
-; @lds.7:  is used as initializer to @gptr.7, and is no-where used. @gptr.7 is used as initializer to @gptr.8,
-;          and @gptr.8 is used within non-kernel function @f1. Hence @lds.7 is lowered.
-;.
-
-; CHECK: %llvm.amdgcn.module.lds.t = type { [3 x float], [1 x float], [2 x float] }
-
-; CHECK: @lds.1 = addrspace(3) global i16 undef, align 2
-; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4
-; CHECK: @lds.3 = addrspace(3) global i64 undef, align 8
-; CHECK: @lds.4 = addrspace(3) global float undef, align 4
-; CHECK-NOT: @lds.5
-; CHECK-NOT: @lds.6
-; CHECK-NOT: @lds.7
- at lds.1 = addrspace(3) global i16 undef, align 2
- at lds.2 = addrspace(3) global i32 undef, align 4
- at lds.3 = addrspace(3) global i64 undef, align 8
- at lds.4 = addrspace(3) global float undef, align 4
- at lds.5 = addrspace(3) global [1 x float] undef, align 4
- at lds.6 = addrspace(3) global [2 x float] undef, align 8
- at lds.7 = addrspace(3) global [3 x float] undef, align 16
-
-; CHECK: @gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
-; CHECK: @gptr.4 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (float addrspace(3)* @lds.4 to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.5 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([1 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.6 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast ([2 x float] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2) to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.7 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i64 addrspace(3)*) to i64*), align 8
-; CHECK: @gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
- at gptr.3 = addrspace(1) global i64* addrspacecast (i64 addrspace(3)* @lds.3 to i64*), align 8
- at gptr.4 = addrspace(1) global i64* addrspacecast (float addrspace(3)* @lds.4 to i64*), align 8
- at gptr.5 = addrspace(1) global i64* addrspacecast ([1 x float] addrspace(3)* @lds.5 to i64*), align 8
- at gptr.6 = addrspace(1) global i64* addrspacecast ([2 x float] addrspace(3)* @lds.6 to i64*), align 8
- at gptr.7 = addrspace(1) global i64* addrspacecast ([3 x float] addrspace(3)* @lds.7 to i64*), align 8
- at gptr.8 = addrspace(1) global i64** addrspacecast (i64* addrspace(1)* @gptr.7 to i64**), align 8
-
-; CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16
-; CHECK: @llvm.compiler.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(3)* bitcast (%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
- at llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i16 addrspace(3)* @lds.1 to i8 addrspace(3)*) to i8*)], section "llvm.metadata"
- at llvm.compiler.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @lds.2 to i8 addrspace(3)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; CHECK-LABEL: @f1()
-; CHECK:   %ld = load i64**, i64** addrspace(1)* @gptr.8, align 8
-; CHECK:   ret void
-define void @f1() {
-  %ld = load i64**, i64** addrspace(1)* @gptr.8
-  ret void
-}
-
-; CHECK-LABEL: @f0()
-; CHECK:   %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
-; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
-; CHECK:   ret void
-define void @f0() {
-  %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.6 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
-  ret void
-}
-
-; CHECK-LABEL: @k0()
-; CHECK:   call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
-; CHECK:   %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32
-; CHECK: addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
-; CHECK:   ret void
-define amdgpu_kernel void @k0() {
-  %ld = load i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(1)* bitcast (i64* addrspace(1)* @gptr.5 to i32 addrspace(1)*) to i32*) to i64)) to i32*), align 4
-  ret void
-}
-
-; CHECK-LABEL: @k1()
-; CHECK:   call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
-; CHECK:   ret void
-define amdgpu_kernel void @k1() {
-  ret void
-}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll
deleted file mode 100644
index c3fd1c0f9e82e..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
-
-; CHECK: %llvm.amdgcn.module.lds.t = type { double, float }
-
-; CHECK: @function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 1) to float*), align 8
-
-; CHECK: @kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to double*), align 8
-
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
-
- at function_target = addrspace(3) global float undef, align 4
- at function_indirect = addrspace(1) global float* addrspacecast (float addrspace(3)* @function_target to float*), align 8
-
- at kernel_target = addrspace(3) global double undef, align 8
- at kernel_indirect = addrspace(1) global double* addrspacecast (double addrspace(3)* @kernel_target to double*), align 8
-
-; CHECK-LABEL: @function(float %x)
-; CHECK: %0 = load float*, float* addrspace(1)* @function_indirect, align 8
-define void @function(float %x) local_unnamed_addr #5 {
-entry:
-  %0 = load float*, float* addrspace(1)* @function_indirect, align 8
-  store float %x, float* %0, align 4
-  ret void
-}
-
-; CHECK-LABEL: @kernel(double %x)
-; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
-; CHECK: %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8
-define amdgpu_kernel void @kernel(double %x) local_unnamed_addr #5 {
-entry:
-  %0 = load double*, double* addrspace(1)* @kernel_indirect, align 8
-  store double %x, double* %0, align 8
-  ret void
-}
-
-
-
-


        


More information about the llvm-commits mailing list