[llvm] [AMDGPU] Use absolute relocations when compiling for AMDPAL (PR #67791)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 29 04:48:25 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
<details>
<summary>Changes</summary>
The primary ISA-independent justification for using PC-relative addressing is that it makes code position-independent and therefore allows sharing of .text pages between processes.
Since PAL does not share .text pages, we can use absolute relocations when compiling for AMDPAL.
---
Patch is 418.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67791.diff
16 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+54)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (+4)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+18-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll (+63-1)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+10-16)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll (+6-12)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+966-1377)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll (+83-112)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll (+68-131)
- (modified) llvm/test/CodeGen/AMDGPU/global-constant.ll (+10-1)
- (modified) llvm/test/CodeGen/AMDGPU/lds-relocs.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll (+4-10)
- (modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll (+19-11)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index db226a302900160..f04f85b7deaa5b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -17,7 +17,10 @@
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -26,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -2764,6 +2768,50 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
return true;
}
+
+ // Emit a ABS32_LO / ABS32_HI relocation stub.
+ void AMDGPULegalizerInfo::buildAbsGlobalAddress(
+ Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+ int64_t Offset, MachineRegisterInfo &MRI) const {
+ bool IsDwordTy = PtrTy.getSizeInBits() == 32;
+
+ LLT S32 = LLT::scalar(32);
+
+ Register AddrDst;
+ if (IsDwordTy) {
+ AddrDst = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(AddrDst, &AMDGPU::SReg_32RegClass);
+ } else {
+ assert(PtrTy.getSizeInBits() == 64 &&
+ "Must provide a 64-bit pointer type!");
+ AddrDst = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass);
+ }
+
+ SmallVector<Register> Operands;
+
+ Register AddrLo = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass);
+
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(AddrLo)
+ .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
+
+ Operands.push_back(AddrLo);
+
+ if (!IsDwordTy) {
+ Register AddrHi = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(AddrHi)
+ .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI);
+
+ Operands.push_back(AddrHi);
+ }
+
+ B.buildMergeValues(DstReg, Operands);
+ }
+
bool AMDGPULegalizerInfo::legalizeGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -2828,6 +2876,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
return true;
}
+ if (ST.isAmdPalOS()) {
+ buildAbsGlobalAddress(DstReg, Ty, B, GV, 0, MRI);
+ MI.eraseFromParent();
+ return true;
+ }
+
const SITargetLowering *TLI = ST.getTargetLowering();
if (TLI->shouldEmitFixup(GV)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index ab7fe92d6a7201e..f60c0498cb6040c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -68,6 +68,10 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
const GlobalValue *GV, int64_t Offset,
unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+ void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
+ const GlobalValue *GV, int64_t Offset,
+ MachineRegisterInfo &MRI) const;
+
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 3f188478ca8bc66..58eed81e075560c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL32_HI;
case MCSymbolRefExpr::VK_AMDGPU_REL64:
return ELF::R_AMDGPU_REL64;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
+ return ELF::R_AMDGPU_ABS32_LO;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
+ return ELF::R_AMDGPU_ABS32_HI;
}
MCFixupKind Kind = Fixup.getKind();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f170428b38c49a5..117a803e2b4970a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5708,7 +5708,10 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
-bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
+bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
+ if (Subtarget->isAmdPalOS())
+ return false;
+
// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
@@ -6726,9 +6729,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
}
+ if (Subtarget->isAmdPalOS()) {
+ SDValue AddrLo = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
+ AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};
+
+ SDValue AddrHi = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI);
+ AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0};
+
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi);
+ }
+
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
- else if (shouldEmitPCReloc(GV))
+
+ if (shouldEmitPCReloc(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
SIInstrInfo::MO_REL32);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
index b60dd6dea7f79d8..fcaafa64bd9e996 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN-PAL %s
@external_constant = external addrspace(4) constant i32, align 4
@external_constant32 = external addrspace(6) constant i32, align 4
@@ -22,6 +22,14 @@ define ptr addrspace(4) @external_constant_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: external_constant_got
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(4) @external_constant
}
@@ -34,6 +42,14 @@ define ptr addrspace(1) @external_global_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: external_global_got
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(1) @external_global
}
@@ -46,6 +62,14 @@ define ptr addrspace(999) @external_other_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: external_other_got
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(999) @external_other
}
@@ -57,6 +81,14 @@ define ptr addrspace(4) @internal_constant_pcrel() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: internal_constant_pcrel
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(4) @internal_constant
}
@@ -68,6 +100,14 @@ define ptr addrspace(1) @internal_global_pcrel() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: internal_global_pcrel
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(1) @internal_global
}
@@ -79,6 +119,14 @@ define ptr addrspace(999) @internal_other_pcrel() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+ ; GCN-PAL-LABEL: name: internal_other_pcrel
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other
+ ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(999) @internal_other
}
@@ -90,6 +138,13 @@ define ptr addrspace(6) @external_constant32_got() {
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
; GCN-NEXT: SI_RETURN implicit $vgpr0
+
+ ; GCN-PAL-LABEL: name: external_constant32_got
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant32
+ ; GCN-PAL-NEXT: [[MV:%[0-9]+]]:_(p6) = G_MERGE_VALUES [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[MV]](p6)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
ret ptr addrspace(6) @external_constant32
}
@@ -100,5 +155,12 @@ define ptr addrspace(6) @internal_constant32_pcrel() {
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
; GCN-NEXT: SI_RETURN implicit $vgpr0
+
+ ; GCN-PAL-LABEL: name: internal_constant32_pcrel
+ ; GCN-PAL: bb.1 (%ir-block.0):
+ ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant32
+ ; GCN-PAL-NEXT: [[MV:%[0-9]+]]:_(p6) = G_MERGE_VALUES [[S_MOV_B32_]](s32)
+ ; GCN-PAL-NEXT: $vgpr0 = COPY [[MV]](p6)
+ ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
ret ptr addrspace(6) @internal_constant32
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
index 2feeb83e6f1467b..c5dbfb0f219bd9b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -12,7 +12,7 @@
; ELF: Relocations [
; ELF-NEXT: Section (3) .rel.text {
-; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b{{$}}
+; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}}
define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 9ec9414d91171b7..9d94f8e6ca227e8 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1335,9 +1335,9 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; CI-NEXT: s_mov_b32 s40, s0
; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0
; CI-NEXT: s_mov_b32 s14, s10
-; CI-NEXT: s_mov_b32 s12, s8
-; CI-NEXT: s_mov_b32 s13, s9
; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: s_mov_b32 s12, s8
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_add_u32 s40, s40, s11
; CI-NEXT: s_mov_b64 s[10:11], s[6:7]
@@ -1345,27 +1345,24 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; CI-NEXT: s_load_dword s6, s[4:5], 0x2
; CI-NEXT: s_addc_u32 s41, s41, 0
; CI-NEXT: s_add_u32 s8, s4, 12
-; CI-NEXT: s_addc_u32 s9, s5, 0
-; CI-NEXT: s_getpc_b64 s[4:5]
-; CI-NEXT: s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
-; CI-NEXT: s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
+; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; CI-NEXT: s_mov_b32 s13, s9
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v40, vcc, s6, v3
-; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CI-NEXT: ds_read_b32 v41, v40
-; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; CI-NEXT: s_addc_u32 s9, s5, 0
; CI-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: s_mov_b64 s[4:5], s[0:1]
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
+; CI-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
+; CI-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; CI-NEXT: v_or_b32_e32 v31, v0, v2
; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_mov_b32 s39, 0xf000
; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CI-NEXT: ds_read_b32 v0, v40 offset:4
; CI-NEXT: s_waitcnt lgkmcnt(0)
@@ -1384,28 +1381,25 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_add_u32 s8, s4, 12
; GFX9-NEXT: s_addc_u32 s9, s5, 0
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s6
-; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; GFX9-NEXT: ds_read_b32 v42, v41
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
+; GFX9-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_mov_b32_e32 v40, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: ds_read_b32 v0, v41 offset:4
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
index 7c9d01db9c2c093..9ab8be0485eddbe 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
@@ -33,21 +33,18 @@ define amdgpu_gfx void @gfx_func() {
; SDAG-NEXT: v_writelane_b32 v40, s21, 17
; SDAG-NEXT: v_writelane_b32 v40, s22, 18
; SDAG-NEXT: v_writelane_b32 v40, s23, 19
-; SDAG-NEXT: s_addk_i32 s32, 0x400
; SDAG-NEXT: v_writelane_b32 v40, s24, 20
; SDAG-NEXT: v_writelane_b32 v40, s25, 21
-; SDAG-NEXT: s_getpc_b64 s[34:35]
-; SDAG-NEXT: s_add_u32 s34, s34, extern_c_func at gotpcrel32@lo+4
-; SDAG-NEXT: s_addc_u32 s35, s35, extern_c_func at gotpcrel32@hi+12
; SDAG-NEXT: v_writelane_b32 v40, s26, 22
-; SDAG-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
; SDAG-NEXT: v_writelane_b32 v40, s27, 23
; SDAG-NEXT: v_writelane_b32 v40, s28, 24
; SDAG-NEXT: v_writelane_b32 v40, s29, 25
; SDAG-NEXT: v_writelane_b32 v40, s30, 26
+; SDAG-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
+; SDAG-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
; SDAG-NEXT: s_mov_b64 s[8:9], 0
+; SDAG-NEXT: s_addk_i32 s32, 0x400
; SDAG-NEXT: v_writelane_b32 v40, s31, 27
-; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35]
; SDAG-NEXT: v_readlane_b32 s31, v40, 27
; SDAG-NEXT: v_readlane_b32 s30, v40, 26
@@ -113,21 +110,18 @@ define amdgpu_gfx void @gfx_func() {
; GISEL-NEXT: v_writelane_b32 v40, s21, 17
; GISEL-NEXT: v_writelane_b32 v40, s22, 18
; GISEL-NEXT: v_writelane_b32 v40, s23, 19
-; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s24, 20
; GISEL-NEXT: v_writelane_b32 v40, s25, 21
-; GISEL-NEXT: s_getpc_b64 s[34:35]
-; GISEL-NEXT: s_add_u32 s34, s34, extern_c_func at gotpcrel32@lo+4
-; GISEL-NEXT: s_addc_u32 s35, s35, extern_c_func at gotpcrel32@hi+12
; GISEL-NEXT: v_writelane_b32 v40, s26, 22
-; GISEL-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
; GISEL-NEXT: v_writelane_b32 v40, s27, 23
; GISEL-NEXT: v_writelane_b32 v40, s28, 24
; GISEL-NEXT: v_writelane_b32 v40, s29, 25
; GISEL-NEXT: v_writelane_b32 v40, s30, 26
+; GISEL-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
+; GISEL-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
; GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s31, 27
-; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GISEL-NEXT: v_readlane_b32 s31, v40, 27
; GISEL-NEXT: v_readlane_b32 s30, v40, 26
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index c01634231403241..f827a78125b7785 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -119,10 +119,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67791
More information about the llvm-commits
mailing list