[llvm] [AArch64] Implement -fno-plt for SelectionDAG/GlobalISel (PR #78890)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 13:51:11 PST 2024
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/78890
>From 549e4ea5b292e558e085d881abd4c93f29352029 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 21 Jan 2024 00:25:34 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 13 ++-
llvm/lib/Target/AArch64/AArch64FastISel.cpp | 7 ++
.../Target/AArch64/AArch64ISelLowering.cpp | 9 ++-
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 11 +--
.../AArch64/GISel/AArch64CallLowering.cpp | 13 ++-
.../GISel/AArch64InstructionSelector.cpp | 16 +++-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +
llvm/test/CodeGen/AArch64/nonlazybind.ll | 81 +++++++++----------
8 files changed, 93 insertions(+), 60 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index ccd9b13d730b60..d3484e5229e704 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -144,9 +144,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// Try looking through a bitcast from one function type to another.
// Commonly happens with calls to objc_msgSend().
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
- if (const Function *F = dyn_cast<Function>(CalleeV))
- Info.Callee = MachineOperand::CreateGA(F, 0);
- else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
+ if (const Function *F = dyn_cast<Function>(CalleeV)) {
+ if (F->hasFnAttribute(Attribute::NonLazyBind)) {
+ auto Reg =
+ MRI.createGenericVirtualRegister(getLLTForType(*F->getType(), DL));
+ MIRBuilder.buildGlobalValue(Reg, F);
+ Info.Callee = MachineOperand::CreateReg(Reg, false);
+ } else {
+ Info.Callee = MachineOperand::CreateGA(F, 0);
+ }
+ } else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
// IR IFuncs and Aliases can't be forward declared (only defined), so the
// callee must be in the same TU and therefore we can direct-call it without
// worrying about it being out of range.
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index e98f6c4984a752..93d6024f34c09c 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3202,6 +3202,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (Callee && !computeCallAddress(Callee, Addr))
return false;
+ // MO_GOT is not handled. -fno-plt compiled intrinsic calls do not have the
+ // nonlazybind attribute. Check "RtLibUseGOT" instead.
+ if ((Subtarget->classifyGlobalFunctionReference(Addr.getGlobalValue(), TM) !=
+ AArch64II::MO_NO_FLAG) ||
+ MF->getFunction().getParent()->getRtLibUseGOT())
+ return false;
+
// The weak function target may be zero; in that case we must use indirect
// addressing via a stub on windows as it may be out of range for a
// PC-relative jump.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 96ea692d03f563..56de890c78deca 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7969,13 +7969,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- Subtarget->isTargetMachO()) {
- const char *Sym = S->getSymbol();
+ bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ Subtarget->isTargetMachO()) ||
+ MF.getFunction().getParent()->getRtLibUseGOT();
+ const char *Sym = S->getSymbol();
+ if (UseGot) {
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
- const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index cf57d950ae8d7f..c4c6827313b5e1 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -43,10 +43,10 @@ static cl::opt<bool>
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);
-static cl::opt<bool>
- UseNonLazyBind("aarch64-enable-nonlazybind",
- cl::desc("Call nonlazybind functions via direct GOT load"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool> MachOUseNonLazyBind(
+ "aarch64-macho-enable-nonlazybind",
+ cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
+ cl::Hidden);
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
@@ -434,7 +434,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(
// NonLazyBind goes via GOT unless we know it's available locally.
auto *F = dyn_cast<Function>(GV);
- if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
+ if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
+ F->hasFnAttribute(Attribute::NonLazyBind) &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 84057ea8d2214a..773eadbf34de37 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1273,8 +1273,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+ // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
+ // is set.
+ if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
+ auto Reg =
+ MRI.createGenericVirtualRegister(getLLTForType(*F.getType(), DL));
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+ DstOp(Reg).addDefToMIB(MRI, MIB);
+ MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
+ Info.Callee = MachineOperand::CreateReg(Reg, false);
+ }
Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+ }
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
unsigned CalleeOpNo = 0;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 8344e79f78e1eb..e60db260e3ef10 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2841,11 +2841,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_GLOBAL_VALUE: {
- auto GV = I.getOperand(1).getGlobal();
- if (GV->isThreadLocal())
- return selectTLSGlobalValue(I, MRI);
+ const GlobalValue *GV = nullptr;
+ unsigned OpFlags;
+ if (I.getOperand(1).isSymbol()) {
+ OpFlags = I.getOperand(1).getTargetFlags();
+ // Currently only used by "RtLibUseGOT".
+ assert(OpFlags == AArch64II::MO_GOT);
+ } else {
+ GV = I.getOperand(1).getGlobal();
+ if (GV->isThreadLocal())
+ return selectTLSGlobalValue(I, MRI);
+ OpFlags = STI.ClassifyGlobalReference(GV, TM);
+ }
- unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b561cb12c93a1c..83137949d0f244 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1314,6 +1314,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// By splitting this here, we can optimize accesses in the small code model by
// folding in the G_ADD_LOW into the load/store offset.
auto &GlobalOp = MI.getOperand(1);
+ // Don't modify an intrinsic call.
+ if (GlobalOp.isSymbol())
+ return true;
const auto* GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
diff --git a/llvm/test/CodeGen/AArch64/nonlazybind.ll b/llvm/test/CodeGen/AArch64/nonlazybind.ll
index 669a8ee04b2492..f5bb3a4ecbc9a0 100644
--- a/llvm/test/CodeGen/AArch64/nonlazybind.ll
+++ b/llvm/test/CodeGen/AArch64/nonlazybind.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
+; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL
; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI
@@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NEXT: Lloh1:
; MACHO-NEXT: ldr x8, [x8, _external at GOTPAGEOFF]
; MACHO-NEXT: blr x8
+; MACHO-NEXT: Lloh2:
+; MACHO-NEXT: adrp x8, _memset at GOTPAGE
; MACHO-NEXT: mov x0, x19
; MACHO-NEXT: mov w1, #1 ; =0x1
+; MACHO-NEXT: Lloh3:
+; MACHO-NEXT: ldr x8, [x8, _memset at GOTPAGEOFF]
; MACHO-NEXT: mov w2, #1000 ; =0x3e8
-; MACHO-NEXT: bl _memset
+; MACHO-NEXT: blr x8
; MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NEXT: ret
+; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; MACHO-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
; MACHO-NORMAL-LABEL: test_laziness:
@@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NORMAL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; MACHO-NORMAL-NEXT: mov x19, x0
; MACHO-NORMAL-NEXT: bl _external
+; MACHO-NORMAL-NEXT: Lloh0:
+; MACHO-NORMAL-NEXT: adrp x8, _memset at GOTPAGE
; MACHO-NORMAL-NEXT: mov x0, x19
; MACHO-NORMAL-NEXT: mov w1, #1 ; =0x1
+; MACHO-NORMAL-NEXT: Lloh1:
+; MACHO-NORMAL-NEXT: ldr x8, [x8, _memset at GOTPAGEOFF]
; MACHO-NORMAL-NEXT: mov w2, #1000 ; =0x3e8
-; MACHO-NORMAL-NEXT: bl _memset
+; MACHO-NORMAL-NEXT: blr x8
; MACHO-NORMAL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ret
+; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
-; ELF-FI-LABEL: test_laziness:
-; ELF-FI: // %bb.0:
-; ELF-FI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-FI-NEXT: mov x19, x0
-; ELF-FI-NEXT: bl external
-; ELF-FI-NEXT: mov w8, #1 // =0x1
-; ELF-FI-NEXT: mov x0, x19
-; ELF-FI-NEXT: mov x2, #1000 // =0x3e8
-; ELF-FI-NEXT: uxtb w1, w8
-; ELF-FI-NEXT: bl memset
-; ELF-FI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-FI-NEXT: ret
-;
-; ELF-GI-LABEL: test_laziness:
-; ELF-GI: // %bb.0:
-; ELF-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-GI-NEXT: mov x19, x0
-; ELF-GI-NEXT: bl external
-; ELF-GI-NEXT: mov x0, x19
-; ELF-GI-NEXT: mov w1, #1 // =0x1
-; ELF-GI-NEXT: mov w2, #1000 // =0x3e8
-; ELF-GI-NEXT: bl memset
-; ELF-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-GI-NEXT: ret
-;
-; ELF-SDAG-LABEL: test_laziness:
-; ELF-SDAG: // %bb.0:
-; ELF-SDAG-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-SDAG-NEXT: mov x19, x0
-; ELF-SDAG-NEXT: bl external
-; ELF-SDAG-NEXT: mov x0, x19
-; ELF-SDAG-NEXT: mov w1, #1 // =0x1
-; ELF-SDAG-NEXT: mov w2, #1000 // =0x3e8
-; ELF-SDAG-NEXT: bl memset
-; ELF-SDAG-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-SDAG-NEXT: ret
+; ELF-LABEL: test_laziness:
+; ELF: // %bb.0:
+; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; ELF-NEXT: adrp x8, :got:external
+; ELF-NEXT: mov x19, x0
+; ELF-NEXT: ldr x8, [x8, :got_lo12:external]
+; ELF-NEXT: blr x8
+; ELF-NEXT: adrp x8, :got:memset
+; ELF-NEXT: mov x0, x19
+; ELF-NEXT: mov w1, #1 // =0x1
+; ELF-NEXT: ldr x8, [x8, :got_lo12:memset]
+; ELF-NEXT: mov w2, #1000 // =0x3e8
+; ELF-NEXT: blr x8
+; ELF-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; ELF-NEXT: ret
call void @external()
call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false)
ret void
@@ -86,12 +75,12 @@ define void @test_laziness(ptr %a) nounwind {
define void @test_laziness_tail() nounwind {
; MACHO-LABEL: test_laziness_tail:
; MACHO: ; %bb.0:
-; MACHO-NEXT: Lloh2:
+; MACHO-NEXT: Lloh4:
; MACHO-NEXT: adrp x0, _external at GOTPAGE
-; MACHO-NEXT: Lloh3:
+; MACHO-NEXT: Lloh5:
; MACHO-NEXT: ldr x0, [x0, _external at GOTPAGEOFF]
; MACHO-NEXT: br x0
-; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
+; MACHO-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
;
; MACHO-NORMAL-LABEL: test_laziness_tail:
; MACHO-NORMAL: ; %bb.0:
@@ -99,7 +88,9 @@ define void @test_laziness_tail() nounwind {
;
; ELF-LABEL: test_laziness_tail:
; ELF: // %bb.0:
-; ELF-NEXT: b external
+; ELF-NEXT: adrp x0, :got:external
+; ELF-NEXT: ldr x0, [x0, :got_lo12:external]
+; ELF-NEXT: br x0
tail call void @external()
ret void
}
@@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
!llvm.module.flags = !{!0}
!0 = !{i32 7, !"RtLibUseGOT", i32 1}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ELF-FI: {{.*}}
+; ELF-GI: {{.*}}
+; ELF-SDAG: {{.*}}
More information about the llvm-commits
mailing list