[llvm] r373286 - AMDGPU/GlobalISel: Legalize G_GLOBAL_VALUE

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 30 18:06:43 PDT 2019


Author: arsenm
Date: Mon Sep 30 18:06:43 2019
New Revision: 373286

URL: http://llvm.org/viewvc/llvm-project?rev=373286&view=rev
Log:
AMDGPU/GlobalISel: Legalize G_GLOBAL_VALUE

Handle other cases besides LDS. Mostly a straight port of the existing
handling, without the intermediate custom nodes.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=373286&r1=373285&r2=373286&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Sep 30 18:06:43 2019
@@ -309,7 +309,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
     .legalIf(isPointer(0));
 
   setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
-  getActionDefinitionsBuilder(G_GLOBAL_VALUE).customFor({LocalPtr});
+  getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+    .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
 
 
   auto &FPOpActions = getActionDefinitionsBuilder(
@@ -1509,6 +1510,62 @@ bool AMDGPULegalizerInfo::legalizeSinCos
   return true;
 }
 
+bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
+  Register DstReg, LLT PtrTy,
+  MachineIRBuilder &B, const GlobalValue *GV,
+  unsigned Offset, unsigned GAFlags) const {
+  // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
+  // to the following code sequence:
+  //
+  // For constant address space:
+  //   s_getpc_b64 s[0:1]
+  //   s_add_u32 s0, s0, $symbol
+  //   s_addc_u32 s1, s1, 0
+  //
+  //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
+  //   a fixup or relocation is emitted to replace $symbol with a literal
+  //   constant, which is a pc-relative offset from the encoding of the $symbol
+  //   operand to the global variable.
+  //
+  // For global address space:
+  //   s_getpc_b64 s[0:1]
+  //   s_add_u32 s0, s0, $symbol@{gotpc}rel32 at lo
+  //   s_addc_u32 s1, s1, $symbol@{gotpc}rel32 at hi
+  //
+  //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
+  //   fixups or relocations are emitted to replace $symbol@*@lo and
+  //   $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
+  //   which is a 64-bit pc-relative offset from the encoding of the $symbol
+  //   operand to the global variable.
+  //
+  // What we want here is an offset from the value returned by s_getpc
+  // (which is the address of the s_add_u32 instruction) to the global
+  // variable, but since the encoding of $symbol starts 4 bytes after the start
+  // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
+  // small. This requires us to add 4 to the global variable offset in order to
+  // compute the correct address.
+
+  LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+
+  Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
+    B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
+
+  MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
+    .addDef(PCReg);
+
+  MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
+  if (GAFlags == SIInstrInfo::MO_NONE)
+    MIB.addImm(0);
+  else
+    MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
+
+  B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
+
+  if (PtrTy.getSizeInBits() == 32)
+    B.buildExtract(DstReg, PCReg, 0);
+  return true;
+ }
+
 bool AMDGPULegalizerInfo::legalizeGlobalValue(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
@@ -1519,10 +1576,9 @@ bool AMDGPULegalizerInfo::legalizeGlobal
   const GlobalValue *GV = MI.getOperand(1).getGlobal();
   MachineFunction &MF = B.getMF();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  B.setInstr(MI);
 
   if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
-    B.setInstr(MI);
-
     if (!MFI->isEntryFunction()) {
       const Function &Fn = MF.getFunction();
       DiagnosticInfoUnsupported BadLDSDecl(
@@ -1536,13 +1592,47 @@ bool AMDGPULegalizerInfo::legalizeGlobal
       MI.eraseFromParent();
       return true;
     }
+
+    const Function &Fn = MF.getFunction();
+    DiagnosticInfoUnsupported BadInit(
+      Fn, "unsupported initializer for address space", MI.getDebugLoc());
+    Fn.getContext().diagnose(BadInit);
+    return true;
+  }
+
+  const SITargetLowering *TLI = ST.getTargetLowering();
+
+  if (TLI->shouldEmitFixup(GV)) {
+    buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (TLI->shouldEmitPCReloc(GV)) {
+    buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+  Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
+
+  MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
+    MachinePointerInfo::getGOT(MF),
+    MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+    MachineMemOperand::MOInvariant,
+    8 /*Size*/, 8 /*Align*/);
+
+  buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
+
+  if (Ty.getSizeInBits() == 32) {
+    // Truncate if this is a 32-bit constant adrdess.
+    auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
+    B.buildExtract(DstReg, Load, 0);
   } else
-    return false;
+    B.buildLoad(DstReg, GOTAddr, *GOTMMO);
 
-  const Function &Fn = MF.getFunction();
-  DiagnosticInfoUnsupported BadInit(
-    Fn, "unsupported initializer for address space", MI.getDebugLoc());
-  Fn.getContext().diagnose(BadInit);
+  MI.eraseFromParent();
   return true;
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h?rev=373286&r1=373285&r2=373286&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h Mon Sep 30 18:06:43 2019
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "AMDGPUArgumentUsageInfo.h"
+#include "SIInstrInfo.h"
 
 namespace llvm {
 
@@ -58,6 +59,10 @@ public:
   bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
                       MachineIRBuilder &B) const;
 
+  bool buildPCRelGlobalAddress(
+    Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+    unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+
   bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
   bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=373286&r1=373285&r2=373286&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Mon Sep 30 18:06:43 2019
@@ -186,6 +186,7 @@ private:
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
 
+public:
   /// \returns True if fixup needs to be emitted for given global value \p GV,
   /// false otherwise.
   bool shouldEmitFixup(const GlobalValue *GV) const;
@@ -198,6 +199,7 @@ private:
   /// global value \p GV, false otherwise.
   bool shouldEmitPCReloc(const GlobalValue *GV) const;
 
+private:
   // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
   // three offsets (voffset, soffset and instoffset) into the SDValue[3] array
   // pointed to by Offsets.

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/global-value.ll?rev=373286&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/global-value.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/global-value.ll Mon Sep 30 18:06:43 2019
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=HSA %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=PAL %s
+
+ at external_constant = external addrspace(4) constant i32, align 4
+ at external_constant32 = external addrspace(6) constant i32, align 4
+ at external_global = external addrspace(1) global i32, align 4
+
+ at internal_constant = internal addrspace(4) constant i32 9, align 4
+ at internal_constant32 = internal addrspace(6) constant i32 9, align 4
+ at internal_global = internal addrspace(1) global i32 9, align 4
+
+
+define i32 addrspace(4)* @external_constant_got() {
+  ; HSA-LABEL: name: external_constant_got
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 4, implicit-def $scc
+  ; HSA:   [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
+  ; HSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4)
+  ; HSA:   $vgpr0 = COPY [[UV]](s32)
+  ; HSA:   $vgpr1 = COPY [[UV1]](s32)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ; PAL-LABEL: name: external_constant_got
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant + 4, 0, implicit-def $scc
+  ; PAL:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
+  ; PAL:   $vgpr0 = COPY [[UV]](s32)
+  ; PAL:   $vgpr1 = COPY [[UV1]](s32)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ret i32 addrspace(4)* @external_constant
+}
+
+define i32 addrspace(1)* @external_global_got() {
+  ; HSA-LABEL: name: external_global_got
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc
+  ; HSA:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
+  ; HSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
+  ; HSA:   $vgpr0 = COPY [[UV]](s32)
+  ; HSA:   $vgpr1 = COPY [[UV1]](s32)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ; PAL-LABEL: name: external_global_got
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc
+  ; PAL:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
+  ; PAL:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
+  ; PAL:   $vgpr0 = COPY [[UV]](s32)
+  ; PAL:   $vgpr1 = COPY [[UV1]](s32)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ret i32 addrspace(1)* @external_global
+}
+
+define i32 addrspace(4)* @internal_constant_pcrel() {
+  ; HSA-LABEL: name: internal_constant_pcrel
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 4, implicit-def $scc
+  ; HSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
+  ; HSA:   $vgpr0 = COPY [[UV]](s32)
+  ; HSA:   $vgpr1 = COPY [[UV1]](s32)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ; PAL-LABEL: name: internal_constant_pcrel
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant + 4, 0, implicit-def $scc
+  ; PAL:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
+  ; PAL:   $vgpr0 = COPY [[UV]](s32)
+  ; PAL:   $vgpr1 = COPY [[UV1]](s32)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ret i32 addrspace(4)* @internal_constant
+}
+
+define i32 addrspace(1)* @internal_global_pcrel() {
+  ; HSA-LABEL: name: internal_global_pcrel
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc
+  ; HSA:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
+  ; HSA:   $vgpr0 = COPY [[UV]](s32)
+  ; HSA:   $vgpr1 = COPY [[UV1]](s32)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ; PAL-LABEL: name: internal_global_pcrel
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc
+  ; PAL:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
+  ; PAL:   $vgpr0 = COPY [[UV]](s32)
+  ; PAL:   $vgpr1 = COPY [[UV1]](s32)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+  ret i32 addrspace(1)* @internal_global
+}
+
+define i32 addrspace(6)* @external_constant32_got() {
+  ; HSA-LABEL: name: external_constant32_got
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 4, implicit-def $scc
+  ; HSA:   [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
+  ; HSA:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
+  ; HSA:   $vgpr0 = COPY [[EXTRACT]](p6)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+  ; PAL-LABEL: name: external_constant32_got
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant32 + 4, 0, implicit-def $scc
+  ; PAL:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
+  ; PAL:   $vgpr0 = COPY [[EXTRACT]](p6)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+  ret i32 addrspace(6)* @external_constant32
+}
+
+define i32 addrspace(6)* @internal_constant32_pcrel() {
+  ; HSA-LABEL: name: internal_constant32_pcrel
+  ; HSA: bb.1 (%ir-block.0):
+  ; HSA:   liveins: $sgpr30_sgpr31
+  ; HSA:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; HSA:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 4, implicit-def $scc
+  ; HSA:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
+  ; HSA:   $vgpr0 = COPY [[EXTRACT]](p6)
+  ; HSA:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; HSA:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+  ; PAL-LABEL: name: internal_constant32_pcrel
+  ; PAL: bb.1 (%ir-block.0):
+  ; PAL:   liveins: $sgpr30_sgpr31
+  ; PAL:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; PAL:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant32 + 4, 0, implicit-def $scc
+  ; PAL:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
+  ; PAL:   $vgpr0 = COPY [[EXTRACT]](p6)
+  ; PAL:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; PAL:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+  ret i32 addrspace(6)* @internal_constant32
+}




More information about the llvm-commits mailing list