[llvm] d3dda42 - [amdgpu][nfc] Replace ad hoc LDS frame recalculation with absolute_symbol MD

Jon Chesterfield via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 12 06:48:13 PDT 2023


Author: Jon Chesterfield
Date: 2023-03-12T13:47:48Z
New Revision: d3dda422bfd1dc281df944b4a07bcd6816e2ee94

URL: https://github.com/llvm/llvm-project/commit/d3dda422bfd1dc281df944b4a07bcd6816e2ee94
DIFF: https://github.com/llvm/llvm-project/commit/d3dda422bfd1dc281df944b4a07bcd6816e2ee94.diff

LOG: [amdgpu][nfc] Replace ad hoc LDS frame recalculation with absolute_symbol MD

Post ISel, LDS variables are absolute values. Representing them as
such is simpler than the frame recalculation currently used to build assembler
tables from their addresses.

This is a precursor to lowering dynamic/external LDS accesses from non-kernel
functions.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D144221

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
    llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
    llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
    llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
    llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
    llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
    llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f25dd8e003fd..6c9cbcd5f8d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1329,15 +1329,10 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = G->getGlobal();
 
-  if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
-    if (!MFI->isModuleEntryFunction()) {
-      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-        if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) {
-          unsigned Offset =
-              AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar);
-          return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
-        }
-      }
+  if (!MFI->isModuleEntryFunction()) {
+    if (std::optional<uint32_t> Address =
+            AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+      return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
     }
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 0804fda79d60..d44e280640ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -608,6 +608,19 @@ class AMDGPULowerModuleLDS : public ModulePass {
     return MostUsed.GV;
   }
 
+  static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV,
+                                       uint32_t Address) {
+    // Write the specified address into metadata where it can be retrieved by
+    // the assembler. Format is a half open range, [Address Address+1)
+    LLVMContext &Ctx = M->getContext();
+    auto *IntTy =
+        M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
+    auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address));
+    auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1));
+    GV->setMetadata(LLVMContext::MD_absolute_symbol,
+                    MDNode::get(Ctx, {MinC, MaxC}));
+  }
+
   bool runOnModule(Module &M) override {
     LLVMContext &Ctx = M.getContext();
     CallGraph CG = CallGraph(M);
@@ -708,17 +721,21 @@ class AMDGPULowerModuleLDS : public ModulePass {
         kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
                                                         TableLookupVariables);
 
+    GlobalVariable *MaybeModuleScopeStruct = nullptr;
     if (!ModuleScopeVariables.empty()) {
       LDSVariableReplacement ModuleScopeReplacement =
           createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
                                        ModuleScopeVariables);
-
+      MaybeModuleScopeStruct = ModuleScopeReplacement.SGV;
       appendToCompilerUsed(M,
                            {static_cast<GlobalValue *>(
                                ConstantExpr::getPointerBitCastOrAddrSpaceCast(
                                    cast<Constant>(ModuleScopeReplacement.SGV),
                                    Type::getInt8PtrTy(Ctx)))});
 
+      // module.lds will be allocated at zero in any kernel that allocates it
+      recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
+
       // historic
       removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
 
@@ -806,6 +823,33 @@ class AMDGPULowerModuleLDS : public ModulePass {
       auto Replacement =
           createLDSVariableReplacement(M, VarName, KernelUsedVariables);
 
+      // This struct is allocated at a predictable address that can be
+      // calculated now, recorded in metadata then used to lower references to
+      // it during codegen.
+      {
+        // frame layout, starting from 0
+        //{
+        //  module.lds
+        //  alignment padding
+        //  kernel instance
+        //}
+
+        if (!MaybeModuleScopeStruct ||
+            Func.hasFnAttribute("amdgpu-elide-module-lds")) {
+          // There's no module.lds for this kernel so this replacement struct
+          // goes first
+          recordLDSAbsoluteAddress(&M, Replacement.SGV, 0);
+        } else {
+          const DataLayout &DL = M.getDataLayout();
+          TypeSize ModuleSize =
+              DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType());
+          GlobalVariable *KernelStruct = Replacement.SGV;
+          Align KernelAlign = AMDGPU::getAlign(DL, KernelStruct);
+          recordLDSAbsoluteAddress(&M, Replacement.SGV,
+                                   alignTo(ModuleSize, KernelAlign));
+        }
+      }
+
       // remove preserves existing codegen
       removeLocalVarsFromUsedLists(M, KernelUsedVariables);
       KernelToReplacement[&Func] = Replacement;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index d88a2cd961b2..da75f5759a8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -13,6 +13,7 @@
 //
 
 #include "AMDGPUMCInstLower.h"
+#include "AMDGPU.h"
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPUTargetMachine.h"
@@ -168,12 +169,11 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
 const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
 
   // Intercept LDS variables with known addresses
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(CV)) {
-    if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) {
-      unsigned offset =
-          AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV);
-      Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset));
-      return AsmPrinter::lowerConstant(C);
+  if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(CV)) {
+    if (std::optional<uint32_t> Address =
+            AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+      auto *IntTy = Type::getInt32Ty(CV->getContext());
+      return AsmPrinter::lowerConstant(ConstantInt::get(IntTy, *Address));
     }
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index a6a32b98f44c..e70afd72462e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -11,7 +11,9 @@
 #include "AMDGPUPerfHintAnalysis.h"
 #include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
@@ -89,24 +91,7 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
 
 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
 
-bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
-  auto name = GV.getName();
-  return (name == ModuleLDSName) ||
-         (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
-}
-
-const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
-    const GlobalVariable &GV) {
-  const Module &M = *GV.getParent();
-  StringRef N(GV.getName());
-  if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
-    return M.getFunction(N);
-  }
-  return nullptr;
-}
-
-const GlobalVariable *
-AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
+static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) {
   const Module *M = F.getParent();
   std::string KernelLDSName = "llvm.amdgcn.kernel.";
   KernelLDSName += F.getName();
@@ -119,40 +104,8 @@ static bool canElideModuleLDS(const Function &F) {
   return F.hasFnAttribute("amdgpu-elide-module-lds");
 }
 
-unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
-    const GlobalVariable &GV) {
-  // module.lds, then alignment padding, then kernel.lds, then other variables
-  // if any
-
-  assert(isKnownAddressLDSGlobal(GV));
-  unsigned Offset = 0;
-
-  if (GV.getName() == ModuleLDSName) {
-    return 0;
-  }
-
-  const Module *M = GV.getParent();
-  const DataLayout &DL = M->getDataLayout();
-
-  const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
-  const Function *f = getKernelLDSFunctionFromGlobal(GV);
-
-  // Account for module.lds if allocated for this function
-  if (GVM && f && !canElideModuleLDS(*f)) {
-    // allocator aligns this to var align, but it's zero to begin with
-    Offset += DL.getTypeAllocSize(GVM->getValueType());
-  }
-
-  // No dynamic LDS alignment done by allocateModuleLDSGlobal
-  Offset = alignTo(
-      Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
-
-  return Offset;
-}
-
 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
   const Module *M = F.getParent();
-
   // This function is called before allocating any other LDS so that it can
   // reliably put values at known addresses. Consequently, dynamic LDS, if
   // present, will not yet have been allocated
@@ -180,40 +133,60 @@ void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
     const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
 
     if (GV && !canElideModuleLDS(F)) {
-      assert(isKnownAddressLDSGlobal(*GV));
       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
-      (void)Offset;
-      assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
-             "Module LDS expected to be allocated before other LDS");
+      std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV);
+      if (!Expect || (Offset != Expect)) {
+        report_fatal_error("Inconsistent metadata on module LDS variable");
+      }
     }
 
     if (KV) {
       // The per-kernel offset is deterministic because it is allocated
       // before any other non-module LDS variables.
-      assert(isKnownAddressLDSGlobal(*KV));
       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
-      (void)Offset;
-      assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
-             "Kernel LDS expected to be immediately after module LDS");
+      std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV);
+      if (!Expect || (Offset != Expect)) {
+        report_fatal_error("Inconsistent metadata on kernel LDS variable");
+      }
     }
   }
 }
 
 std::optional<uint32_t>
 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
-  auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+  // TODO: Would be more consistent with the abs symbols to use a range
+  MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
   if (MD && MD->getNumOperands() == 1) {
-    ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
-    if (KnownSize) {
-      uint64_t V = KnownSize->getZExtValue();
-      if (V <= UINT32_MAX) {
-        return V;
+    if (ConstantInt *KnownSize =
+            mdconst::extract<ConstantInt>(MD->getOperand(0))) {
+      uint64_t ZExt = KnownSize->getZExtValue();
+      if (ZExt <= UINT32_MAX) {
+        return ZExt;
       }
     }
   }
   return {};
 }
 
+std::optional<uint32_t>
+AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
+  if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+    return {};
+
+  std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
+  if (!AbsSymRange)
+    return {};
+
+  if (const APInt *V = AbsSymRange->getSingleElement()) {
+    std::optional<uint64_t> ZExt = V->tryZExtValue();
+    if (ZExt && (*ZExt <= UINT32_MAX)) {
+      return *ZExt;
+    }
+  }
+
+  return {};
+}
+
 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
                                            const GlobalVariable &GV) {
   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f27f8252a4d8..ba4c55a58c86 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -106,20 +106,8 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
 
   void allocateKnownAddressLDSGlobal(const Function &F);
 
-  // A kernel function may have an associated LDS allocation, and a kernel-scope
-  // LDS allocation must have an associated kernel function
-
-  // LDS allocation should have an associated kernel function
-  static const Function *
-  getKernelLDSFunctionFromGlobal(const GlobalVariable &GV);
-  static const GlobalVariable *
-  getKernelLDSGlobalFromFunction(const Function &F);
-
-  // Module or kernel scope LDS variable
-  static bool isKnownAddressLDSGlobal(const GlobalVariable &GV);
-  static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV);
-
   static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
+  static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
 
   Align getDynLDSAlign() const { return DynLDSAlign; }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
index 5aa5fcac08a9..0c47b439706b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -14,19 +14,19 @@
 ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [4 x i8] }
 
 ;.
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
+; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8, !absolute_symbol !0
 ; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2
-; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4
+; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4, !absolute_symbol !0
 ;.
 define amdgpu_kernel void @k0() #0 {
 ; CHECK-LABEL: @k0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13
 ; CHECK-NEXT:    ret void
   store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
 
@@ -41,9 +41,9 @@ define amdgpu_kernel void @k0() #0 {
 
 define amdgpu_kernel void @k1() #0 {
 ; CHECK-LABEL: @k1(
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23
 ; CHECK-NEXT:    ret void
 ;
   store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
@@ -83,8 +83,8 @@ define amdgpu_kernel void @calls_f0() {
 
 define void @f0() {
 ; CHECK-LABEL: define void @f0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !23
-; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !23
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
+; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
 ; CHECK-NEXT: ret void
   store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
 
@@ -95,3 +95,5 @@ define void @f0() {
 
 attributes #0 = { "amdgpu-elide-module-lds" }
 ; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
+
+; CHECK: !0 = !{i64 0, i64 1}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
index da1923e1c112..e9be71e8f483 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
@@ -11,15 +11,15 @@
 
 ;.
 ; CHECK: @lds.k2 = addrspace(3) global [1 x i8] undef, align 1
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
+; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0
 ;.
 define amdgpu_kernel void @k0() {
 ; CHECK-LABEL: @k0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13
 ; CHECK-NEXT: ret void
   store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
 
@@ -34,9 +34,9 @@ define amdgpu_kernel void @k0() {
 
 define amdgpu_kernel void @k1() {
 ; CHECK-LABEL: @k1(
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23
 ; CHECK-NEXT: ret void
 ;
   store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
@@ -61,3 +61,5 @@ define amdgpu_ps void @k2() {
 
   ret void
 }
+
+; CHECK: !0 = !{i64 0, i64 1}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
index 35a4d46b3a5a..72ba840c15f1 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
@@ -13,7 +13,7 @@ $_f2 = comdat any
 @_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1
 
 ;.
-; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4
+; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4, !absolute_symbol !0
 ;.
 define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
 ; GCN-LABEL: test:
@@ -33,13 +33,13 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce
 ; GCN-NEXT:    s_endpgm
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT:    tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
+; CHECK-NEXT:    tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
 ; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP4]], 3
-; CHECK-NEXT:    store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
+; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
+; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
 ; CHECK-NEXT:    [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP9]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]]
 ; CHECK-NEXT:    [[FROMBOOL8:%.*]] = zext i1 [[TMP10]] to i8
@@ -66,11 +66,12 @@ declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; CHECK: [[META0:![0-9]+]] = !{!1}
-; CHECK: [[META1:![0-9]+]] = distinct !{!1, !2}
-; CHECK: [[META2:![0-9]+]] = distinct !{!2}
-; CHECK: [[META3:![0-9]+]] = !{!4}
-; CHECK: [[META4:![0-9]+]] = distinct !{!4, !2}
-; CHECK: [[META5:![0-9]+]] = !{!4, !1}
-; CHECK: [[META6:![0-9]+]] = !{}
+; CHECK: [[META0:![0-9]+]] = !{i64 0, i64 1}
+; CHECK: [[META1:![0-9]+]] = !{!2}
+; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3}
+; CHECK: [[META3:![0-9]+]] = distinct !{!3}
+; CHECK: [[META4:![0-9]+]] = !{!5}
+; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3}
+; CHECK: [[META6:![0-9]+]] = !{!5, !2}
+; CHECK: [[META7:![0-9]+]] = !{}
 ;.

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
index 8ed0fb7f6687..10064664aa99 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
@@ -5,10 +5,10 @@
 @b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
 
 ; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !0, !noalias !5
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !0, !noalias !5
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !1, !noalias !6
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !noalias !6
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !1, !noalias !6
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !noalias !6
 
 define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrspace(1) %arg, i32 %i) {
 bb:
@@ -34,9 +34,10 @@ bb:
 !8 = !{!"omnipotent char", !9, i64 0}
 !9 = !{!"Simple C++ TBAA"}
 
-; CHECK:!0 = !{!1, !2, i64 0}
-; CHECK:!1 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !2, i64 0}
-; CHECK:!2 = !{!"int", !3, i64 0}
-; CHECK:!3 = !{!"omnipotent char", !4, i64 0}
-; CHECK:!4 = !{!"Simple C++ TBAA"}
-; CHECK:!5 = !{}
+; CHECK:!0 = !{i64 0, i64 1}
+; CHECK:!1 = !{!2, !3, i64 0}
+; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0}
+; CHECK:!3 = !{!"int", !4, i64 0}
+; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
+; CHECK:!5 = !{!"Simple C++ TBAA"}
+; CHECK:!6 = !{}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
index 983516c3cabd..d7697c903721 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
@@ -17,10 +17,10 @@
 ; GCN: ds_read_b32
 
 ; CHECK-LABEL: @no_clobber_ds_load_stores_x2
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !0, !noalias !3
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !0, !noalias !3
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !3, !noalias !0
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !3, !noalias !0
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !1, !noalias !4
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !4, !noalias !1
 
 define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) {
 bb:
@@ -44,13 +44,13 @@ bb:
 ; GCN-DAG: ds_read_b32
 
 ; CHECK-LABEL: @no_clobber_ds_load_stores_x3
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !5, !noalias !8
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9
 ; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !5, !noalias !8
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !11, !noalias !12
-; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !13, !noalias !14
-; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !13, !noalias !14
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !6, !noalias !9
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !12, !noalias !13
+; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15
+; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !14, !noalias !15
 
 define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) {
 bb:
@@ -69,18 +69,19 @@ bb:
   ret void
 }
 
-; CHECK: !0 = !{!1}
-; CHECK: !1 = distinct !{!1, !2}
-; CHECK: !2 = distinct !{!2}
-; CHECK: !3 = !{!4}
-; CHECK: !4 = distinct !{!4, !2}
-; CHECK: !5 = !{!6}
-; CHECK: !6 = distinct !{!6, !7}
-; CHECK: !7 = distinct !{!7}
-; CHECK: !8 = !{!9, !10}
-; CHECK: !9 = distinct !{!9, !7}
-; CHECK: !10 = distinct !{!10, !7}
-; CHECK: !11 = !{!9}
-; CHECK: !12 = !{!6, !10}
-; CHECK: !13 = !{!10}
-; CHECK: !14 = !{!6, !9}
+; CHECK: !0 = !{i64 0, i64 1}
+; CHECK: !1 = !{!2}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK: !3 = distinct !{!3}
+; CHECK: !4 = !{!5}
+; CHECK: !5 = distinct !{!5, !3}
+; CHECK: !6 = !{!7}
+; CHECK: !7 = distinct !{!7, !8}
+; CHECK: !8 = distinct !{!8}
+; CHECK: !9 = !{!10, !11}
+; CHECK: !10 = distinct !{!10, !8}
+; CHECK: !11 = distinct !{!11, !8}
+; CHECK: !12 = !{!10}
+; CHECK: !13 = !{!7, !11}
+; CHECK: !14 = !{!11}
+; CHECK: !15 = !{!7, !10}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
index c380bb1c9b92..b3a0f94d4cde 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
@@ -28,9 +28,9 @@ define amdgpu_kernel void @k0() {
 @f0.lds = addrspace(3) global i16 undef
 define void @f0() {
 ; MODULE-LABEL: @f0(
-; MODULE-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3
+; MODULE-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
 ; MODULE-NEXT:    [[MUL:%.*]] = mul i16 [[LD]], 3
-; MODULE-NEXT:    store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3
+; MODULE-NEXT:    store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
 ; MODULE-NEXT:    ret void
 ;
 ; TABLE-LABEL: @f0(
@@ -60,7 +60,7 @@ define void @f0() {
 
 define amdgpu_kernel void @k_f0() {
 ; MODULE-LABEL: @k_f0(
-; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
+; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope !5, !noalias !1
 ; MODULE-NEXT:    call void @f0()
 ; MODULE-NEXT:    ret void
 ;
@@ -82,9 +82,9 @@ define amdgpu_kernel void @k_f0() {
 @both.lds = addrspace(3) global i32 undef
 define void @f_both() {
 ; MODULE-LABEL: @f_both(
-; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3
+; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
 ; MODULE-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 4
-; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3
+; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
 ; MODULE-NEXT:    ret void
 ;
 ; TABLE-LABEL: @f_both(
@@ -115,9 +115,9 @@ define void @f_both() {
 define amdgpu_kernel void @k0_both() {
 ; MODULE-LABEL: @k0_both(
 ; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
-; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0
+; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
 ; MODULE-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 5
-; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0
+; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
 ; MODULE-NEXT:    call void @f_both()
 ; MODULE-NEXT:    ret void
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
index cdead7560b29..75211d595273 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
@@ -11,12 +11,12 @@
 @v3 = addrspace(3) global i8 undef
 @unused = addrspace(3) global i16 undef
 
-; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16
+; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16, !absolute_symbol !0
 ; OPT: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
-; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4
-; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8
+; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4, !absolute_symbol !1
+; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8, !absolute_symbol !2
 ; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]
 
 ;.
@@ -234,9 +234,9 @@ define amdgpu_kernel void @k123() {
 ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
 ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
 ; OPT-NEXT:    call void @f1()
-; OPT-NEXT:    %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5
+; OPT-NEXT:    %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
 ; OPT-NEXT:    %mul = mul i8 %ld, 8
-; OPT-NEXT:    store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5
+; OPT-NEXT:    store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
 ; OPT-NEXT:    call void @f2()
 ; OPT-NEXT:    ret void
 ;
@@ -289,13 +289,16 @@ define amdgpu_kernel void @k123() {
 ; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
 ; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; OPT: !0 = !{i32 1}
-; OPT: !1 = !{i32 0}
-; OPT: !2 = !{!3}
-; OPT: !3 = distinct !{!3, !4}
-; OPT: !4 = distinct !{!4}
+; OPT: !0 = !{i64 0, i64 1}
+; OPT: !1 = !{i64 4, i64 5}
+; OPT: !2 = !{i64 8, i64 9}
+; OPT: !3 = !{i32 1}
+; OPT: !4 = !{i32 0}
 ; OPT: !5 = !{!6}
-; OPT: !6 = distinct !{!6, !4}
+; OPT: !6 = distinct !{!6, !7}
+; OPT: !7 = distinct !{!7}
+; OPT: !8 = !{!9}
+; OPT: !9 = distinct !{!9, !7}
 ;.
 
 ; Table size length number-kernels * number-variables * sizeof(uint16_t)

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
index 8b289f12fce4..5b7da296f2e9 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
@@ -15,10 +15,10 @@
 ; OPT: %llvm.amdgcn.kernel.k23.lds.t = type { i64, i8 }
 ; OPT: %llvm.amdgcn.kernel.k123.lds.t = type { i16, i8, [5 x i8], i64 }
 
-; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16
-; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16
+; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16, !absolute_symbol !0
 
 ; Salient parts of the IR lookup table check:
 ; It has (top level) size 3 as there are 3 kernels that call functions which use lds
@@ -218,7 +218,7 @@ define amdgpu_kernel void @kernel_no_table() {
 
 ; Access two variables, will allocate those two
 define amdgpu_kernel void @k01() {
-; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !0 {
+; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !1 {
 ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
 ; OPT-NEXT:    call void @f0()
 ; OPT-NEXT:    call void @f1()
@@ -256,7 +256,7 @@ define amdgpu_kernel void @k01() {
 }
 
 define amdgpu_kernel void @k23() {
-; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !1 {
+; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !2 {
 ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
 ; OPT-NEXT:    call void @f2()
 ; OPT-NEXT:    call void @f3()
@@ -295,12 +295,12 @@ define amdgpu_kernel void @k23() {
 
 ; Access and allocate three variables
 define amdgpu_kernel void @k123() {
-; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !2 {
+; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !3 {
 ; OPT-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
 ; OPT-NEXT:    call void @f1()
-; OPT-NEXT:    [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT:    [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
 ; OPT-NEXT:    [[MUL:%.*]] = mul i8 [[LD]], 8
-; OPT-NEXT:    store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT:    store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
 ; OPT-NEXT:    call void @f2()
 ; OPT-NEXT:    ret void
 ;
@@ -346,9 +346,10 @@ define amdgpu_kernel void @k123() {
 
 ; OPT: declare i32 @llvm.amdgcn.lds.kernel.id()
 
-!0 = !{i32 0}
-!1 = !{i32 2}
-!2 = !{i32 1}
+!0 = !{i64 0, i64 1}
+!1 = !{i32 0}
+!2 = !{i32 2}
+!3 = !{i32 1}
 
 
 ; Table size length number-kernels * number-variables * sizeof(uint16_t)


        


More information about the llvm-commits mailing list