[llvm] r275268 - AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 13 07:23:34 PDT 2016
Author: tstellar
Date: Wed Jul 13 09:23:33 2016
New Revision: 275268
URL: http://llvm.org/viewvc/llvm-project?rev=275268&view=rev
Log:
AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL
Reviewers: rafael, ruiu, tony-tye, arsenm, kzhuravl
Subscribers: arsenm, llvm-commits, kzhuravl
Differential Revision: http://reviews.llvm.org/D21484
Added:
llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll
Removed:
llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Jul 13 09:23:33 2016
@@ -106,7 +106,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->EmitDirectiveHSACodeObjectVersion(2, 0);
+ TS->EmitDirectiveHSACodeObjectVersion(2, 1);
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp Wed Jul 13 09:23:33 2016
@@ -39,6 +39,13 @@ using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
Ctx(ctx), ST(st) { }
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
+ switch (MOFlags) {
+ default: return MCSymbolRefExpr::VK_None;
+ case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL;
+ }
+}
+
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
@@ -69,7 +76,8 @@ void AMDGPUMCInstLower::lower(const Mach
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx);
+ const MCExpr *SymExpr =
+ MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
MCOp = MCOperand::createExpr(Expr);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Wed Jul 13 09:23:33 2016
@@ -134,9 +134,9 @@ static StringRef getGPUOrDefault(const T
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
- return *RM;
+ // The AMDGPU toolchain only supports generating shared objects, so we
+ // must always use PIC.
+ return Reloc::PIC_;
}
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jul 13 09:23:33 2016
@@ -510,12 +510,14 @@ bool SITargetLowering::isMemOpUniform(co
const Value *Ptr = MemNode->getMemOperand()->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers
- if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
- isa<GlobalValue>(Ptr))
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
return true;
- const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}
@@ -1517,27 +1519,22 @@ SDValue SITargetLowering::lowerADDRSPACE
return DAG.getUNDEF(ASC->getValueType(0));
}
+static bool shouldEmitGOTReloc(const GlobalValue *GV,
+ const TargetMachine &TM) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
+}
+
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
- return false;
-
- return TargetLowering::isOffsetFoldingLegal(GA);
+ // We can fold offsets for anything that doesn't require a GOT relocation.
+ return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
}
-SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
- SDValue Op,
- SelectionDAG &DAG) const {
- GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
- return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
-
- SDLoc DL(GSD);
- const GlobalValue *GV = GSD->getGlobal();
- EVT PtrVT = Op.getValueType();
-
+static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
+ SDLoc DL, unsigned Offset, EVT PtrVT,
+ unsigned GAFlags = SIInstrInfo::MO_NONE) {
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
// s_getpc_b64 s[0:1]
@@ -1555,11 +1552,41 @@ SDValue SITargetLowering::LowerGlobalAdd
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address.
- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32,
- GSD->getOffset() + 4);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
+ GAFlags);
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
}
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+ SDValue Op,
+ SelectionDAG &DAG) const {
+ GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+ return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
+
+ SDLoc DL(GSD);
+ const GlobalValue *GV = GSD->getGlobal();
+ EVT PtrVT = Op.getValueType();
+
+ if (!shouldEmitGOTReloc(GV, getTargetMachine()))
+ return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
+
+ SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
+ SIInstrInfo::MO_GOTPCREL);
+
+ Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ const DataLayout &DataLayout = DAG.getDataLayout();
+ unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
+ // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+ return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr,
+ PtrInfo, false, false, true, Align);
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op,
SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Jul 13 09:23:33 2016
@@ -91,6 +91,12 @@ protected:
unsigned OpIdx1) const override;
public:
+
+ enum TargetOperandFlags {
+ MO_NONE = 0,
+ MO_GOTPCREL = 1
+ };
+
explicit SIInstrInfo(const SISubtarget &);
const SIRegisterInfo &getRegisterInfo() const {
Added: llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll?rev=275268&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll Wed Jul 13 09:23:33 2016
@@ -0,0 +1,203 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s
+
+ at private = private addrspace(1) global [256 x i32] zeroinitializer
+ at internal = internal addrspace(1) global [256 x i32] zeroinitializer
+ at available_externally = available_externally addrspace(1) global [256 x i32] zeroinitializer
+ at linkonce = linkonce addrspace(1) global [256 x i32] zeroinitializer
+ at weak= weak addrspace(1) global [256 x i32] zeroinitializer
+ at common = common addrspace(1) global [256 x i32] zeroinitializer
+ at extern_weak = extern_weak addrspace(1) global [256 x i32]
+ at linkonce_odr = linkonce_odr addrspace(1) global [256 x i32] zeroinitializer
+ at weak_odr = weak_odr addrspace(1) global [256 x i32] zeroinitializer
+ at external = external addrspace(1) global [256 x i32]
+ at external_w_init = addrspace(1) global [256 x i32] zeroinitializer
+
+; CHECK-LABEL: private_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @private_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @private, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: internal_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @internal_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @internal, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: available_externally_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @available_externally_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: linkonce_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: common_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @common_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: extern_weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @extern_weak_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: linkonce_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_odr_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: weak_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_odr_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: external_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: external_w_init_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_w_init_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: .local private
+; CHECK: .local internal
+; CHECK: .weak linkonce
+; CHECK: .weak weak
+; CHECK: .weak linkonce_odr
+; CHECK: .weak weak_odr
+; CHECK-NOT: external{{$}}
+; CHECK: .globl external_w_init
Removed: llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll?rev=275267&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll (removed)
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s
-
-; CHECK: {{^}}load_init_global_global:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4
-; CHECK: s_addc_u32 s5, s[[PC_HI]], 0
-; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40
-; CHECK: global:
-; CHECK: .zero 1024
- at global = addrspace(1) global [256 x i32] zeroinitializer
-
-define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10
- %ld = load i32, i32 addrspace(1)* %gep
- store i32 %ld, i32 addrspace(1)* %out
- ret void
-}
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll Wed Jul 13 09:23:33 2016
@@ -18,7 +18,7 @@
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
@@ -30,7 +30,7 @@
; ELF: Type: Function (0x2)
; ELF: }
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll Wed Jul 13 09:23:33 2016
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=HSA --check-prefix=HSA-FIJI %s
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-FIJI: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa.ll Wed Jul 13 09:23:33 2016
@@ -18,7 +18,7 @@
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
@@ -29,7 +29,7 @@
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: }
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
More information about the llvm-commits
mailing list