[llvm] d3dda42 - [amdgpu][nfc] Replace ad hoc LDS frame recalculation with absolute_symbol MD
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 12 06:48:13 PDT 2023
Author: Jon Chesterfield
Date: 2023-03-12T13:47:48Z
New Revision: d3dda422bfd1dc281df944b4a07bcd6816e2ee94
URL: https://github.com/llvm/llvm-project/commit/d3dda422bfd1dc281df944b4a07bcd6816e2ee94
DIFF: https://github.com/llvm/llvm-project/commit/d3dda422bfd1dc281df944b4a07bcd6816e2ee94.diff
LOG: [amdgpu][nfc] Replace ad hoc LDS frame recalculation with absolute_symbol MD
Post ISel, LDS variables are absolute values. Representing them as
such is simpler than the frame recalculation currently used to build assembler
tables from their addresses.
This is a precursor to lowering dynamic/external LDS accesses from non-kernel
functions.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D144221
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f25dd8e003fd..6c9cbcd5f8d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1329,15 +1329,10 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
- if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- if (!MFI->isModuleEntryFunction()) {
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
- if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) {
- unsigned Offset =
- AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar);
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
- }
- }
+ if (!MFI->isModuleEntryFunction()) {
+ if (std::optional<uint32_t> Address =
+ AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+ return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 0804fda79d60..d44e280640ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -608,6 +608,19 @@ class AMDGPULowerModuleLDS : public ModulePass {
return MostUsed.GV;
}
+ static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV,
+ uint32_t Address) {
+ // Write the specified address into metadata where it can be retrieved by
+ // the assembler. Format is a half open range, [Address Address+1)
+ LLVMContext &Ctx = M->getContext();
+ auto *IntTy =
+ M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(Ctx, {MinC, MaxC}));
+ }
+
bool runOnModule(Module &M) override {
LLVMContext &Ctx = M.getContext();
CallGraph CG = CallGraph(M);
@@ -708,17 +721,21 @@ class AMDGPULowerModuleLDS : public ModulePass {
kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
TableLookupVariables);
+ GlobalVariable *MaybeModuleScopeStruct = nullptr;
if (!ModuleScopeVariables.empty()) {
LDSVariableReplacement ModuleScopeReplacement =
createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
ModuleScopeVariables);
-
+ MaybeModuleScopeStruct = ModuleScopeReplacement.SGV;
appendToCompilerUsed(M,
{static_cast<GlobalValue *>(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
cast<Constant>(ModuleScopeReplacement.SGV),
Type::getInt8PtrTy(Ctx)))});
+ // module.lds will be allocated at zero in any kernel that allocates it
+ recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
+
// historic
removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
@@ -806,6 +823,33 @@ class AMDGPULowerModuleLDS : public ModulePass {
auto Replacement =
createLDSVariableReplacement(M, VarName, KernelUsedVariables);
+ // This struct is allocated at a predictable address that can be
+ // calculated now, recorded in metadata then used to lower references to
+ // it during codegen.
+ {
+ // frame layout, starting from 0
+ //{
+ // module.lds
+ // alignment padding
+ // kernel instance
+ //}
+
+ if (!MaybeModuleScopeStruct ||
+ Func.hasFnAttribute("amdgpu-elide-module-lds")) {
+ // There's no module.lds for this kernel so this replacement struct
+ // goes first
+ recordLDSAbsoluteAddress(&M, Replacement.SGV, 0);
+ } else {
+ const DataLayout &DL = M.getDataLayout();
+ TypeSize ModuleSize =
+ DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType());
+ GlobalVariable *KernelStruct = Replacement.SGV;
+ Align KernelAlign = AMDGPU::getAlign(DL, KernelStruct);
+ recordLDSAbsoluteAddress(&M, Replacement.SGV,
+ alignTo(ModuleSize, KernelAlign));
+ }
+ }
+
// remove preserves existing codegen
removeLocalVarsFromUsedLists(M, KernelUsedVariables);
KernelToReplacement[&Func] = Replacement;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index d88a2cd961b2..da75f5759a8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -13,6 +13,7 @@
//
#include "AMDGPUMCInstLower.h"
+#include "AMDGPU.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUMachineFunction.h"
#include "AMDGPUTargetMachine.h"
@@ -168,12 +169,11 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
// Intercept LDS variables with known addresses
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(CV)) {
- if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) {
- unsigned offset =
- AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV);
- Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset));
- return AsmPrinter::lowerConstant(C);
+ if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(CV)) {
+ if (std::optional<uint32_t> Address =
+ AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+ auto *IntTy = Type::getInt32Ty(CV->getContext());
+ return AsmPrinter::lowerConstant(ConstantInt::get(IntTy, *Address));
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index a6a32b98f44c..e70afd72462e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -11,7 +11,9 @@
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -89,24 +91,7 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
-bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
- auto name = GV.getName();
- return (name == ModuleLDSName) ||
- (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
-}
-
-const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
- const GlobalVariable &GV) {
- const Module &M = *GV.getParent();
- StringRef N(GV.getName());
- if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
- return M.getFunction(N);
- }
- return nullptr;
-}
-
-const GlobalVariable *
-AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
+static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) {
const Module *M = F.getParent();
std::string KernelLDSName = "llvm.amdgcn.kernel.";
KernelLDSName += F.getName();
@@ -119,40 +104,8 @@ static bool canElideModuleLDS(const Function &F) {
return F.hasFnAttribute("amdgpu-elide-module-lds");
}
-unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
- const GlobalVariable &GV) {
- // module.lds, then alignment padding, then kernel.lds, then other variables
- // if any
-
- assert(isKnownAddressLDSGlobal(GV));
- unsigned Offset = 0;
-
- if (GV.getName() == ModuleLDSName) {
- return 0;
- }
-
- const Module *M = GV.getParent();
- const DataLayout &DL = M->getDataLayout();
-
- const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
- const Function *f = getKernelLDSFunctionFromGlobal(GV);
-
- // Account for module.lds if allocated for this function
- if (GVM && f && !canElideModuleLDS(*f)) {
- // allocator aligns this to var align, but it's zero to begin with
- Offset += DL.getTypeAllocSize(GVM->getValueType());
- }
-
- // No dynamic LDS alignment done by allocateModuleLDSGlobal
- Offset = alignTo(
- Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
-
- return Offset;
-}
-
void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
const Module *M = F.getParent();
-
// This function is called before allocating any other LDS so that it can
// reliably put values at known addresses. Consequently, dynamic LDS, if
// present, will not yet have been allocated
@@ -180,40 +133,60 @@ void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
if (GV && !canElideModuleLDS(F)) {
- assert(isKnownAddressLDSGlobal(*GV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
- "Module LDS expected to be allocated before other LDS");
+ std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV);
+ if (!Expect || (Offset != Expect)) {
+ report_fatal_error("Inconsistent metadata on module LDS variable");
+ }
}
if (KV) {
// The per-kernel offset is deterministic because it is allocated
// before any other non-module LDS variables.
- assert(isKnownAddressLDSGlobal(*KV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
- "Kernel LDS expected to be immediately after module LDS");
+ std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV);
+ if (!Expect || (Offset != Expect)) {
+ report_fatal_error("Inconsistent metadata on kernel LDS variable");
+ }
}
}
}
std::optional<uint32_t>
AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
- auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+ // TODO: Would be more consistent with the abs symbols to use a range
+ MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
if (MD && MD->getNumOperands() == 1) {
- ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
- if (KnownSize) {
- uint64_t V = KnownSize->getZExtValue();
- if (V <= UINT32_MAX) {
- return V;
+ if (ConstantInt *KnownSize =
+ mdconst::extract<ConstantInt>(MD->getOperand(0))) {
+ uint64_t ZExt = KnownSize->getZExtValue();
+ if (ZExt <= UINT32_MAX) {
+ return ZExt;
}
}
}
return {};
}
+std::optional<uint32_t>
+AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
+ if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ return {};
+
+ std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
+ if (!AbsSymRange)
+ return {};
+
+ if (const APInt *V = AbsSymRange->getSingleElement()) {
+ std::optional<uint64_t> ZExt = V->tryZExtValue();
+ if (ZExt && (*ZExt <= UINT32_MAX)) {
+ return *ZExt;
+ }
+ }
+
+ return {};
+}
+
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
const GlobalVariable &GV) {
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f27f8252a4d8..ba4c55a58c86 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -106,20 +106,8 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
void allocateKnownAddressLDSGlobal(const Function &F);
- // A kernel function may have an associated LDS allocation, and a kernel-scope
- // LDS allocation must have an associated kernel function
-
- // LDS allocation should have an associated kernel function
- static const Function *
- getKernelLDSFunctionFromGlobal(const GlobalVariable &GV);
- static const GlobalVariable *
- getKernelLDSGlobalFromFunction(const Function &F);
-
- // Module or kernel scope LDS variable
- static bool isKnownAddressLDSGlobal(const GlobalVariable &GV);
- static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV);
-
static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
+ static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
Align getDynLDSAlign() const { return DynLDSAlign; }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
index 5aa5fcac08a9..0c47b439706b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -14,19 +14,19 @@
; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [4 x i8] }
;.
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
+; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8, !absolute_symbol !0
; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2
-; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4
+; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4, !absolute_symbol !0
;.
define amdgpu_kernel void @k0() #0 {
; CHECK-LABEL: @k0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13
; CHECK-NEXT: ret void
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
@@ -41,9 +41,9 @@ define amdgpu_kernel void @k0() #0 {
define amdgpu_kernel void @k1() #0 {
; CHECK-LABEL: @k1(
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23
; CHECK-NEXT: ret void
;
store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
@@ -83,8 +83,8 @@ define amdgpu_kernel void @calls_f0() {
define void @f0() {
; CHECK-LABEL: define void @f0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !23
-; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !23
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
+; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
; CHECK-NEXT: ret void
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
@@ -95,3 +95,5 @@ define void @f0() {
attributes #0 = { "amdgpu-elide-module-lds" }
; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
+
+; CHECK: !0 = !{i64 0, i64 1}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
index da1923e1c112..e9be71e8f483 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
@@ -11,15 +11,15 @@
;.
; CHECK: @lds.k2 = addrspace(3) global [1 x i8] undef, align 1
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
+; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16, !absolute_symbol !0
;.
define amdgpu_kernel void @k0() {
; CHECK-LABEL: @k0(
-; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !7, !noalias !8
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !9, !noalias !10
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !11, !noalias !12
+; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13
; CHECK-NEXT: ret void
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
@@ -34,9 +34,9 @@ define amdgpu_kernel void @k0() {
define amdgpu_kernel void @k1() {
; CHECK-LABEL: @k1(
-; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !13, !noalias !16
-; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !19, !noalias !20
-; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !21, !noalias !22
+; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17
+; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21
+; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23
; CHECK-NEXT: ret void
;
store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2
@@ -61,3 +61,5 @@ define amdgpu_ps void @k2() {
ret void
}
+
+; CHECK: !0 = !{i64 0, i64 1}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
index 35a4d46b3a5a..72ba840c15f1 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
@@ -13,7 +13,7 @@ $_f2 = comdat any
@_f2 = linkonce_odr hidden local_unnamed_addr addrspace(3) global %vec_type undef, comdat, align 1
;.
-; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4
+; CHECK: @[[LLVM_AMDGCN_KERNEL_TEST_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]] undef, align 4, !absolute_symbol !0
;.
define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce) local_unnamed_addr #0 {
; GCN-LABEL: test:
@@ -33,13 +33,13 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce
; GCN-NEXT: s_endpgm
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
+; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP4]], 3
-; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !5, !noalias !6
-; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4
+; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.test.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1
; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP9]], 2
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]]
; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP10]] to i8
@@ -66,11 +66,12 @@ declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
;.
-; CHECK: [[META0:![0-9]+]] = !{!1}
-; CHECK: [[META1:![0-9]+]] = distinct !{!1, !2}
-; CHECK: [[META2:![0-9]+]] = distinct !{!2}
-; CHECK: [[META3:![0-9]+]] = !{!4}
-; CHECK: [[META4:![0-9]+]] = distinct !{!4, !2}
-; CHECK: [[META5:![0-9]+]] = !{!4, !1}
-; CHECK: [[META6:![0-9]+]] = !{}
+; CHECK: [[META0:![0-9]+]] = !{i64 0, i64 1}
+; CHECK: [[META1:![0-9]+]] = !{!2}
+; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3}
+; CHECK: [[META3:![0-9]+]] = distinct !{!3}
+; CHECK: [[META4:![0-9]+]] = !{!5}
+; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3}
+; CHECK: [[META6:![0-9]+]] = !{!5, !2}
+; CHECK: [[META7:![0-9]+]] = !{}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
index 8ed0fb7f6687..10064664aa99 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
@@ -5,10 +5,10 @@
@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !0, !noalias !5
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !0, !noalias !5
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !1, !noalias !6
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !noalias !6
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !1, !noalias !6
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !noalias !6
define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrspace(1) %arg, i32 %i) {
bb:
@@ -34,9 +34,10 @@ bb:
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C++ TBAA"}
-; CHECK:!0 = !{!1, !2, i64 0}
-; CHECK:!1 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !2, i64 0}
-; CHECK:!2 = !{!"int", !3, i64 0}
-; CHECK:!3 = !{!"omnipotent char", !4, i64 0}
-; CHECK:!4 = !{!"Simple C++ TBAA"}
-; CHECK:!5 = !{}
+; CHECK:!0 = !{i64 0, i64 1}
+; CHECK:!1 = !{!2, !3, i64 0}
+; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0}
+; CHECK:!3 = !{!"int", !4, i64 0}
+; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
+; CHECK:!5 = !{!"Simple C++ TBAA"}
+; CHECK:!6 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
index 983516c3cabd..d7697c903721 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
@@ -17,10 +17,10 @@
; GCN: ds_read_b32
; CHECK-LABEL: @no_clobber_ds_load_stores_x2
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !0, !noalias !3
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !0, !noalias !3
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !3, !noalias !0
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !3, !noalias !0
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !1, !noalias !4
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !4, !noalias !1
define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) {
bb:
@@ -44,13 +44,13 @@ bb:
; GCN-DAG: ds_read_b32
; CHECK-LABEL: @no_clobber_ds_load_stores_x3
-; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !5, !noalias !8
+; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9
; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i
-; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !5, !noalias !8
-; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12
-; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !11, !noalias !12
-; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !13, !noalias !14
-; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !13, !noalias !14
+; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !6, !noalias !9
+; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13
+; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !12, !noalias !13
+; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15
+; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !14, !noalias !15
define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) {
bb:
@@ -69,18 +69,19 @@ bb:
ret void
}
-; CHECK: !0 = !{!1}
-; CHECK: !1 = distinct !{!1, !2}
-; CHECK: !2 = distinct !{!2}
-; CHECK: !3 = !{!4}
-; CHECK: !4 = distinct !{!4, !2}
-; CHECK: !5 = !{!6}
-; CHECK: !6 = distinct !{!6, !7}
-; CHECK: !7 = distinct !{!7}
-; CHECK: !8 = !{!9, !10}
-; CHECK: !9 = distinct !{!9, !7}
-; CHECK: !10 = distinct !{!10, !7}
-; CHECK: !11 = !{!9}
-; CHECK: !12 = !{!6, !10}
-; CHECK: !13 = !{!10}
-; CHECK: !14 = !{!6, !9}
+; CHECK: !0 = !{i64 0, i64 1}
+; CHECK: !1 = !{!2}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK: !3 = distinct !{!3}
+; CHECK: !4 = !{!5}
+; CHECK: !5 = distinct !{!5, !3}
+; CHECK: !6 = !{!7}
+; CHECK: !7 = distinct !{!7, !8}
+; CHECK: !8 = distinct !{!8}
+; CHECK: !9 = !{!10, !11}
+; CHECK: !10 = distinct !{!10, !8}
+; CHECK: !11 = distinct !{!11, !8}
+; CHECK: !12 = !{!10}
+; CHECK: !13 = !{!7, !11}
+; CHECK: !14 = !{!11}
+; CHECK: !15 = !{!7, !10}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
index c380bb1c9b92..b3a0f94d4cde 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
@@ -28,9 +28,9 @@ define amdgpu_kernel void @k0() {
@f0.lds = addrspace(3) global i16 undef
define void @f0() {
; MODULE-LABEL: @f0(
-; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3
+; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
-; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !0, !noalias !3
+; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f0(
@@ -60,7 +60,7 @@ define void @f0() {
define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
-; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
+; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope !5, !noalias !1
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
@@ -82,9 +82,9 @@ define amdgpu_kernel void @k_f0() {
@both.lds = addrspace(3) global i32 undef
define void @f_both() {
; MODULE-LABEL: @f_both(
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !3
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
@@ -115,9 +115,9 @@ define void @f_both() {
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
-; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0
+; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
-; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !4, !noalias !0
+; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
index cdead7560b29..75211d595273 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
@@ -11,12 +11,12 @@
@v3 = addrspace(3) global i8 undef
@unused = addrspace(3) global i16 undef
-; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16
+; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 16, !absolute_symbol !0
; OPT: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
-; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4
-; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8
+; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 4, !absolute_symbol !1
+; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8, !absolute_symbol !2
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]
;.
@@ -234,9 +234,9 @@ define amdgpu_kernel void @k123() {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f1()
-; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5
+; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
; OPT-NEXT: %mul = mul i8 %ld, 8
-; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !2, !noalias !5
+; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
@@ -289,13 +289,16 @@ define amdgpu_kernel void @k123() {
; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
-; OPT: !0 = !{i32 1}
-; OPT: !1 = !{i32 0}
-; OPT: !2 = !{!3}
-; OPT: !3 = distinct !{!3, !4}
-; OPT: !4 = distinct !{!4}
+; OPT: !0 = !{i64 0, i64 1}
+; OPT: !1 = !{i64 4, i64 5}
+; OPT: !2 = !{i64 8, i64 9}
+; OPT: !3 = !{i32 1}
+; OPT: !4 = !{i32 0}
; OPT: !5 = !{!6}
-; OPT: !6 = distinct !{!6, !4}
+; OPT: !6 = distinct !{!6, !7}
+; OPT: !7 = distinct !{!7}
+; OPT: !8 = !{!9}
+; OPT: !9 = distinct !{!9, !7}
;.
; Table size length number-kernels * number-variables * sizeof(uint16_t)
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
index 8b289f12fce4..5b7da296f2e9 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
@@ -15,10 +15,10 @@
; OPT: %llvm.amdgcn.kernel.k23.lds.t = type { i64, i8 }
; OPT: %llvm.amdgcn.kernel.k123.lds.t = type { i16, i8, [5 x i8], i64 }
-; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16
-; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8
-; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16
+; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t undef, align 16, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t undef, align 8, !absolute_symbol !0
+; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 16, !absolute_symbol !0
; Salient parts of the IR lookup table check:
; It has (top level) size 3 as there are 3 kernels that call functions which use lds
@@ -218,7 +218,7 @@ define amdgpu_kernel void @kernel_no_table() {
; Access two variables, will allocate those two
define amdgpu_kernel void @k01() {
-; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !0 {
+; OPT-LABEL: @k01() !llvm.amdgcn.lds.kernel.id !1 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
; OPT-NEXT: call void @f0()
; OPT-NEXT: call void @f1()
@@ -256,7 +256,7 @@ define amdgpu_kernel void @k01() {
}
define amdgpu_kernel void @k23() {
-; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !1 {
+; OPT-LABEL: @k23() !llvm.amdgcn.lds.kernel.id !2 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
@@ -295,12 +295,12 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
-; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !2 {
+; OPT-LABEL: @k123() !llvm.amdgcn.lds.kernel.id !3 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
; OPT-NEXT: call void @f1()
-; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
-; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !3, !noalias !6
+; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !4, !noalias !7
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
@@ -346,9 +346,10 @@ define amdgpu_kernel void @k123() {
; OPT: declare i32 @llvm.amdgcn.lds.kernel.id()
-!0 = !{i32 0}
-!1 = !{i32 2}
-!2 = !{i32 1}
+!0 = !{i64 0, i64 1}
+!1 = !{i32 0}
+!2 = !{i32 2}
+!3 = !{i32 1}
; Table size length number-kernels * number-variables * sizeof(uint16_t)
More information about the llvm-commits
mailing list