[llvm] [AMDGPU] Use absolute relocations when compiling for AMDPAL (PR #67791)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 04:48:25 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

<details>
<summary>Changes</summary>

The primary ISA-independent justification for using PC-relative addressing is that it makes code position-independent and therefore allows sharing of .text pages between processes.

Since PAL does not share .text pages, we can use absolute relocations when compiling for AMDPAL.

---

Patch is 418.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67791.diff


16 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+54) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (+4) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+18-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll (+63-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+10-16) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll (+6-12) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+966-1377) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll (+83-112) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll (+68-131) 
- (modified) llvm/test/CodeGen/AMDGPU/global-constant.ll (+10-1) 
- (modified) llvm/test/CodeGen/AMDGPU/lds-relocs.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll (+4-10) 
- (modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30) 
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll (+19-11) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index db226a302900160..f04f85b7deaa5b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -17,7 +17,10 @@
 #include "AMDGPUGlobalISelUtils.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -26,6 +29,7 @@
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -2764,6 +2768,50 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
   return true;
  }
 
+ 
+ // Emit a ABS32_LO / ABS32_HI relocation stub.
+ void AMDGPULegalizerInfo::buildAbsGlobalAddress(
+   Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+     int64_t Offset, MachineRegisterInfo &MRI) const {
+  bool IsDwordTy = PtrTy.getSizeInBits() == 32;
+
+  LLT S32 = LLT::scalar(32);
+
+  Register AddrDst;
+  if (IsDwordTy) {
+    AddrDst = MRI.createGenericVirtualRegister(S32);
+    MRI.setRegClass(AddrDst, &AMDGPU::SReg_32RegClass);
+  } else {
+    assert(PtrTy.getSizeInBits() == 64 &&
+           "Must provide a 64-bit pointer type!");
+    AddrDst = MRI.createGenericVirtualRegister(LLT::scalar(64));
+    MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass);
+  }
+
+  SmallVector<Register> Operands;
+
+  Register AddrLo = MRI.createGenericVirtualRegister(S32);
+  MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass);
+
+  B.buildInstr(AMDGPU::S_MOV_B32)
+      .addDef(AddrLo)
+      .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
+
+  Operands.push_back(AddrLo);
+
+  if (!IsDwordTy) {
+    Register AddrHi = MRI.createGenericVirtualRegister(S32);
+    MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass);
+    B.buildInstr(AMDGPU::S_MOV_B32)
+        .addDef(AddrHi)
+        .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI);
+
+    Operands.push_back(AddrHi);
+  }
+
+  B.buildMergeValues(DstReg, Operands);
+ }
+
 bool AMDGPULegalizerInfo::legalizeGlobalValue(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
@@ -2828,6 +2876,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
     return true;
   }
 
+  if (ST.isAmdPalOS()) {
+    buildAbsGlobalAddress(DstReg, Ty, B, GV, 0, MRI);
+    MI.eraseFromParent();
+    return true;
+  }
+
   const SITargetLowering *TLI = ST.getTargetLowering();
 
   if (TLI->shouldEmitFixup(GV)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index ab7fe92d6a7201e..f60c0498cb6040c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -68,6 +68,10 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
                                const GlobalValue *GV, int64_t Offset,
                                unsigned GAFlags = SIInstrInfo::MO_NONE) const;
 
+  void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
+                             const GlobalValue *GV, int64_t Offset,
+                             MachineRegisterInfo &MRI) const;
+
   bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
   bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 3f188478ca8bc66..58eed81e075560c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_AMDGPU_REL32_HI;
   case MCSymbolRefExpr::VK_AMDGPU_REL64:
     return ELF::R_AMDGPU_REL64;
+  case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
+    return ELF::R_AMDGPU_ABS32_LO;
+  case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
+    return ELF::R_AMDGPU_ABS32_HI;
   }
 
   MCFixupKind Kind = Fixup.getKind();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f170428b38c49a5..117a803e2b4970a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5708,7 +5708,10 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
          AMDGPU::shouldEmitConstantsToTextSection(TT);
 }
 
-bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
+bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {  
+  if (Subtarget->isAmdPalOS())
+    return false;
+
   // FIXME: Either avoid relying on address space here or change the default
   // address space for functions to avoid the explicit check.
   return (GV->getValueType()->isFunctionTy() ||
@@ -6726,9 +6729,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
     return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
   }
 
+  if (Subtarget->isAmdPalOS()) {
+    SDValue AddrLo = DAG.getTargetGlobalAddress(
+        GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
+    AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};
+
+    SDValue AddrHi = DAG.getTargetGlobalAddress(
+        GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI);
+    AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0};
+
+    return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi);
+  }
+
   if (shouldEmitFixup(GV))
     return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
-  else if (shouldEmitPCReloc(GV))
+  
+  if (shouldEmitPCReloc(GV))
     return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
                                    SIInstrInfo::MO_REL32);
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
index b60dd6dea7f79d8..fcaafa64bd9e996 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN-PAL %s
 
 @external_constant = external addrspace(4) constant i32, align 4
 @external_constant32 = external addrspace(6) constant i32, align 4
@@ -22,6 +22,14 @@ define ptr addrspace(4) @external_constant_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: external_constant_got
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(4) @external_constant
 }
 
@@ -34,6 +42,14 @@ define ptr addrspace(1) @external_global_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: external_global_got
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(1) @external_global
 }
 
@@ -46,6 +62,14 @@ define ptr addrspace(999) @external_other_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: external_other_got
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(999) @external_other
 }
 
@@ -57,6 +81,14 @@ define ptr addrspace(4) @internal_constant_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: internal_constant_pcrel
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(4) @internal_constant
 }
 
@@ -68,6 +100,14 @@ define ptr addrspace(1) @internal_global_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: internal_global_pcrel
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(1) @internal_global
 }
 
@@ -79,6 +119,14 @@ define ptr addrspace(999) @internal_other_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+  ; GCN-PAL-LABEL: name: internal_other_pcrel
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
   ret ptr addrspace(999) @internal_other
 }
 
@@ -90,6 +138,13 @@ define ptr addrspace(6) @external_constant32_got() {
   ; GCN-NEXT:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
   ; GCN-NEXT:   $vgpr0 = COPY [[EXTRACT]](p6)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
+
+  ; GCN-PAL-LABEL: name: external_constant32_got
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant32
+  ; GCN-PAL-NEXT:   [[MV:%[0-9]+]]:_(p6) = G_MERGE_VALUES [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[MV]](p6)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0
   ret ptr addrspace(6) @external_constant32
 }
 
@@ -100,5 +155,12 @@ define ptr addrspace(6) @internal_constant32_pcrel() {
   ; GCN-NEXT:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
   ; GCN-NEXT:   $vgpr0 = COPY [[EXTRACT]](p6)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
+
+  ; GCN-PAL-LABEL: name: internal_constant32_pcrel
+  ; GCN-PAL: bb.1 (%ir-block.0):
+  ; GCN-PAL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant32
+  ; GCN-PAL-NEXT:   [[MV:%[0-9]+]]:_(p6) = G_MERGE_VALUES [[S_MOV_B32_]](s32)
+  ; GCN-PAL-NEXT:   $vgpr0 = COPY [[MV]](p6)
+  ; GCN-PAL-NEXT:   SI_RETURN implicit $vgpr0
   ret ptr addrspace(6) @internal_constant32
 }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
index 2feeb83e6f1467b..c5dbfb0f219bd9b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -12,7 +12,7 @@
 
 ; ELF: Relocations [
 ; ELF-NEXT: Section (3) .rel.text {
-; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b{{$}}
+; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}}
 
 define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
   %rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 9ec9414d91171b7..9d94f8e6ca227e8 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1335,9 +1335,9 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
 ; CI-NEXT:    s_mov_b32 s40, s0
 ; CI-NEXT:    s_load_dwordx4 s[40:43], s[40:41], 0x0
 ; CI-NEXT:    s_mov_b32 s14, s10
-; CI-NEXT:    s_mov_b32 s12, s8
-; CI-NEXT:    s_mov_b32 s13, s9
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    s_mov_b32 s12, s8
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_add_u32 s40, s40, s11
 ; CI-NEXT:    s_mov_b64 s[10:11], s[6:7]
@@ -1345,27 +1345,24 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
 ; CI-NEXT:    s_load_dword s6, s[4:5], 0x2
 ; CI-NEXT:    s_addc_u32 s41, s41, 0
 ; CI-NEXT:    s_add_u32 s8, s4, 12
-; CI-NEXT:    s_addc_u32 s9, s5, 0
-; CI-NEXT:    s_getpc_b64 s[4:5]
-; CI-NEXT:    s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
-; CI-NEXT:    s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
+; CI-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; CI-NEXT:    s_mov_b32 s13, s9
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_i32_e32 v40, vcc, s6, v3
-; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CI-NEXT:    ds_read_b32 v41, v40
-; CI-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; CI-NEXT:    s_addc_u32 s9, s5, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; CI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; CI-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_mov_b64 s[0:1], s[40:41]
+; CI-NEXT:    s_mov_b32 s17, void_func_void at abs32@hi
+; CI-NEXT:    s_mov_b32 s16, void_func_void at abs32@lo
 ; CI-NEXT:    v_or_b32_e32 v31, v0, v2
 ; CI-NEXT:    s_mov_b64 s[2:3], s[42:43]
 ; CI-NEXT:    s_mov_b32 s32, 0
 ; CI-NEXT:    s_mov_b32 s39, 0xf000
 ; CI-NEXT:    s_mov_b32 s38, -1
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CI-NEXT:    ds_read_b32 v0, v40 offset:4
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1384,28 +1381,25 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_add_u32 s36, s36, s11
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
 ; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x8
 ; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x0
+; GFX9-NEXT:    s_addc_u32 s37, s37, 0
 ; GFX9-NEXT:    s_add_u32 s8, s4, 12
 ; GFX9-NEXT:    s_addc_u32 s9, s5, 0
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_lshl_add_u32 v41, v0, 2, s6
-; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; GFX9-NEXT:    ds_read_b32 v42, v41
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT:    s_mov_b32 s17, void_func_void at abs32@hi
+; GFX9-NEXT:    s_mov_b32 s16, void_func_void at abs32@lo
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v40, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    ds_read_b32 v0, v41 offset:4
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
index 7c9d01db9c2c093..9ab8be0485eddbe 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
@@ -33,21 +33,18 @@ define amdgpu_gfx void @gfx_func() {
 ; SDAG-NEXT:    v_writelane_b32 v40, s21, 17
 ; SDAG-NEXT:    v_writelane_b32 v40, s22, 18
 ; SDAG-NEXT:    v_writelane_b32 v40, s23, 19
-; SDAG-NEXT:    s_addk_i32 s32, 0x400
 ; SDAG-NEXT:    v_writelane_b32 v40, s24, 20
 ; SDAG-NEXT:    v_writelane_b32 v40, s25, 21
-; SDAG-NEXT:    s_getpc_b64 s[34:35]
-; SDAG-NEXT:    s_add_u32 s34, s34, extern_c_func at gotpcrel32@lo+4
-; SDAG-NEXT:    s_addc_u32 s35, s35, extern_c_func at gotpcrel32@hi+12
 ; SDAG-NEXT:    v_writelane_b32 v40, s26, 22
-; SDAG-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
 ; SDAG-NEXT:    v_writelane_b32 v40, s27, 23
 ; SDAG-NEXT:    v_writelane_b32 v40, s28, 24
 ; SDAG-NEXT:    v_writelane_b32 v40, s29, 25
 ; SDAG-NEXT:    v_writelane_b32 v40, s30, 26
+; SDAG-NEXT:    s_mov_b32 s35, extern_c_func at abs32@hi
+; SDAG-NEXT:    s_mov_b32 s34, extern_c_func at abs32@lo
 ; SDAG-NEXT:    s_mov_b64 s[8:9], 0
+; SDAG-NEXT:    s_addk_i32 s32, 0x400
 ; SDAG-NEXT:    v_writelane_b32 v40, s31, 27
-; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SDAG-NEXT:    s_swappc_b64 s[30:31], s[34:35]
 ; SDAG-NEXT:    v_readlane_b32 s31, v40, 27
 ; SDAG-NEXT:    v_readlane_b32 s30, v40, 26
@@ -113,21 +110,18 @@ define amdgpu_gfx void @gfx_func() {
 ; GISEL-NEXT:    v_writelane_b32 v40, s21, 17
 ; GISEL-NEXT:    v_writelane_b32 v40, s22, 18
 ; GISEL-NEXT:    v_writelane_b32 v40, s23, 19
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
 ; GISEL-NEXT:    v_writelane_b32 v40, s24, 20
 ; GISEL-NEXT:    v_writelane_b32 v40, s25, 21
-; GISEL-NEXT:    s_getpc_b64 s[34:35]
-; GISEL-NEXT:    s_add_u32 s34, s34, extern_c_func at gotpcrel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s35, s35, extern_c_func at gotpcrel32@hi+12
 ; GISEL-NEXT:    v_writelane_b32 v40, s26, 22
-; GISEL-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
 ; GISEL-NEXT:    v_writelane_b32 v40, s27, 23
 ; GISEL-NEXT:    v_writelane_b32 v40, s28, 24
 ; GISEL-NEXT:    v_writelane_b32 v40, s29, 25
 ; GISEL-NEXT:    v_writelane_b32 v40, s30, 26
+; GISEL-NEXT:    s_mov_b32 s34, extern_c_func at abs32@lo
+; GISEL-NEXT:    s_mov_b32 s35, extern_c_func at abs32@hi
 ; GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    s_addk_i32 s32, 0x400
 ; GISEL-NEXT:    v_writelane_b32 v40, s31, 27
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[34:35]
 ; GISEL-NEXT:    v_readlane_b32 s31, v40, 27
 ; GISEL-NEXT:    v_readlane_b32 s30, v40, 26
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index c01634231403241..f827a78125b7785 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -119,10 +119,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
 ; GFX9-NEXT:    s_addk_i32 s32, 0x400
 ; GFX9-NEXT:    v_writelane...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/67791


More information about the llvm-commits mailing list