[llvm] r275268 - AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 13 07:23:34 PDT 2016


Author: tstellar
Date: Wed Jul 13 09:23:33 2016
New Revision: 275268

URL: http://llvm.org/viewvc/llvm-project?rev=275268&view=rev
Log:
AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL

Reviewers: rafael, ruiu, tony-tye, arsenm, kzhuravl

Subscribers: arsenm, llvm-commits, kzhuravl

Differential Revision: http://reviews.llvm.org/D21484

Added:
    llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll
Removed:
    llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
    llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
    llvm/trunk/test/CodeGen/AMDGPU/hsa.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Jul 13 09:23:33 2016
@@ -106,7 +106,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
   AMDGPUTargetStreamer *TS =
       static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
 
-  TS->EmitDirectiveHSACodeObjectVersion(2, 0);
+  TS->EmitDirectiveHSACodeObjectVersion(2, 1);
 
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
   TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp Wed Jul 13 09:23:33 2016
@@ -39,6 +39,13 @@ using namespace llvm;
 AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
   Ctx(ctx), ST(st) { }
 
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
+  switch (MOFlags) {
+  default: return MCSymbolRefExpr::VK_None;
+  case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL;
+  }
+}
+
 void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
 
   int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
@@ -69,7 +76,8 @@ void AMDGPUMCInstLower::lower(const Mach
     case MachineOperand::MO_GlobalAddress: {
       const GlobalValue *GV = MO.getGlobal();
       MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
-      const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx);
+      const MCExpr *SymExpr =
+          MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
       const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
           MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
       MCOp = MCOperand::createExpr(Expr);

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Wed Jul 13 09:23:33 2016
@@ -134,9 +134,9 @@ static StringRef getGPUOrDefault(const T
 }
 
 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
-  if (!RM.hasValue())
-    return Reloc::PIC_;
-  return *RM;
+  // The AMDGPU toolchain only supports generating shared objects, so we
+  // must always use PIC.
+  return Reloc::PIC_;
 }
 
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jul 13 09:23:33 2016
@@ -510,12 +510,14 @@ bool SITargetLowering::isMemOpUniform(co
   const Value *Ptr = MemNode->getMemOperand()->getValue();
 
   // UndefValue means this is a load of a kernel input.  These are uniform.
-  // Sometimes LDS instructions have constant pointers
-  if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
-      isa<GlobalValue>(Ptr))
+  // Sometimes LDS instructions have constant pointers.
+  // If Ptr is null, then that means this mem operand contains a
+  // PseudoSourceValue like GOT.
+  if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+      isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
     return true;
 
-  const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+  const Instruction *I = dyn_cast<Instruction>(Ptr);
   return I && I->getMetadata("amdgpu.uniform");
 }
 
@@ -1517,27 +1519,22 @@ SDValue SITargetLowering::lowerADDRSPACE
   return DAG.getUNDEF(ASC->getValueType(0));
 }
 
+static bool shouldEmitGOTReloc(const GlobalValue *GV,
+                               const TargetMachine &TM) {
+  return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+         !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
+}
+
 bool
 SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
-    return false;
-
-  return TargetLowering::isOffsetFoldingLegal(GA);
+  // We can fold offsets for anything that doesn't require a GOT relocation.
+  return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+         !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
 }
 
-SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
-                                             SDValue Op,
-                                             SelectionDAG &DAG) const {
-  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-
-  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
-      GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
-    return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
-
-  SDLoc DL(GSD);
-  const GlobalValue *GV = GSD->getGlobal();
-  EVT PtrVT = Op.getValueType();
-
+static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
+                                      SDLoc DL, unsigned Offset, EVT PtrVT,
+                                      unsigned GAFlags = SIInstrInfo::MO_NONE) {
   // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
   // lowered to the following code sequence:
   // s_getpc_b64 s[0:1]
@@ -1555,11 +1552,41 @@ SDValue SITargetLowering::LowerGlobalAdd
   // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
   // small. This requires us to add 4 to the global variable offset in order to
   // compute the correct address.
-  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32,
-                                          GSD->getOffset() + 4);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
+                                          GAFlags);
   return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
 }
 
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+                                             SDValue Op,
+                                             SelectionDAG &DAG) const {
+  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+
+  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+      GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+    return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
+
+  SDLoc DL(GSD);
+  const GlobalValue *GV = GSD->getGlobal();
+  EVT PtrVT = Op.getValueType();
+
+  if (!shouldEmitGOTReloc(GV, getTargetMachine()))
+    return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
+
+  SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
+                                            SIInstrInfo::MO_GOTPCREL);
+
+  Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+  const DataLayout &DataLayout = DAG.getDataLayout();
+  unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
+  // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
+  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+  return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr,
+                     PtrInfo, false, false, true, Align);
+}
+
 SDValue SITargetLowering::lowerTRAP(SDValue Op,
                                     SelectionDAG &DAG) const {
   const MachineFunction &MF = DAG.getMachineFunction();

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Jul 13 09:23:33 2016
@@ -91,6 +91,12 @@ protected:
                                        unsigned OpIdx1) const override;
 
 public:
+
+  enum TargetOperandFlags {
+    MO_NONE = 0,
+    MO_GOTPCREL = 1
+  };
+
   explicit SIInstrInfo(const SISubtarget &);
 
   const SIRegisterInfo &getRegisterInfo() const {

Added: llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll?rev=275268&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-variable-relocs.ll Wed Jul 13 09:23:33 2016
@@ -0,0 +1,203 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s
+
+ at private = private addrspace(1) global [256 x i32] zeroinitializer
+ at internal = internal addrspace(1) global [256 x i32] zeroinitializer
+ at available_externally = available_externally addrspace(1) global [256 x i32] zeroinitializer
+ at linkonce = linkonce addrspace(1) global [256 x i32] zeroinitializer
+ at weak= weak addrspace(1) global [256 x i32] zeroinitializer
+ at common = common addrspace(1) global [256 x i32] zeroinitializer
+ at extern_weak = extern_weak addrspace(1) global [256 x i32]
+ at linkonce_odr = linkonce_odr addrspace(1) global [256 x i32] zeroinitializer
+ at weak_odr = weak_odr addrspace(1) global [256 x i32] zeroinitializer
+ at external = external addrspace(1) global [256 x i32]
+ at external_w_init = addrspace(1) global [256 x i32] zeroinitializer
+
+; CHECK-LABEL: private_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @private_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @private, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: internal_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @internal_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @internal, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: available_externally_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @available_externally_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: linkonce_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: common_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @common_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: extern_weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @extern_weak_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: linkonce_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_odr_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: weak_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_odr_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: external_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: external_w_init_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init at GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_w_init_test(i32 addrspace(1)* %out) {
+  %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1
+  %val = load i32, i32 addrspace(1)* %ptr
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: .local private
+; CHECK: .local internal
+; CHECK: .weak linkonce
+; CHECK: .weak weak
+; CHECK: .weak linkonce_odr
+; CHECK: .weak weak_odr
+; CHECK-NOT: external{{$}}
+; CHECK: .globl external_w_init

Removed: llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll?rev=275267&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll (removed)
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s
-
-; CHECK: {{^}}load_init_global_global:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4
-; CHECK: s_addc_u32 s5, s[[PC_HI]], 0
-; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40
-; CHECK: global:
-; CHECK: .zero 1024
- at global = addrspace(1) global [256 x i32] zeroinitializer
-
-define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10
-  %ld = load i32, i32 addrspace(1)* %gep
-  store i32 %ld, i32 addrspace(1)* %out
-  ret void
-}

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-func.ll Wed Jul 13 09:23:33 2016
@@ -18,7 +18,7 @@
 
 ; ELF: SHT_NOTE
 ; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
 
 ; ELF: 0020: 03000000 414D4400 04000700 07000000
 ; ELF: 0030: 00000000 00000000 414D4400 414D4447
@@ -30,7 +30,7 @@
 ; ELF: Type: Function (0x2)
 ; ELF: }
 
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll Wed Jul 13 09:23:33 2016
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=HSA --check-prefix=HSA-FIJI %s
 
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 ; HSA-FIJI: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa.ll?rev=275268&r1=275267&r2=275268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa.ll Wed Jul 13 09:23:33 2016
@@ -18,7 +18,7 @@
 
 ; ELF: SHT_NOTE
 ; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
 ; ELF: 0020: 03000000 414D4400 04000700 07000000
 ; ELF: 0030: 00000000 00000000 414D4400 414D4447
 ; ELF: 0040: 50550000
@@ -29,7 +29,7 @@
 ; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
 ; ELF: }
 
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 




More information about the llvm-commits mailing list