[clang] [clang-tools-extra] [llvm] [AArch64] Implement -fno-plt for SelectionDAG/GlobalISel (PR #78890)

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 29 22:00:26 PST 2024


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/78890

>From 549e4ea5b292e558e085d881abd4c93f29352029 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 21 Jan 2024 00:25:34 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  | 13 ++-
 llvm/lib/Target/AArch64/AArch64FastISel.cpp   |  7 ++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  9 ++-
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  | 11 +--
 .../AArch64/GISel/AArch64CallLowering.cpp     | 13 ++-
 .../GISel/AArch64InstructionSelector.cpp      | 16 +++-
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  3 +
 llvm/test/CodeGen/AArch64/nonlazybind.ll      | 81 +++++++++----------
 8 files changed, 93 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index ccd9b13d730b6..d3484e5229e70 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -144,9 +144,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   // Try looking through a bitcast from one function type to another.
   // Commonly happens with calls to objc_msgSend().
   const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
-  if (const Function *F = dyn_cast<Function>(CalleeV))
-    Info.Callee = MachineOperand::CreateGA(F, 0);
-  else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
+  if (const Function *F = dyn_cast<Function>(CalleeV)) {
+    if (F->hasFnAttribute(Attribute::NonLazyBind)) {
+      auto Reg =
+          MRI.createGenericVirtualRegister(getLLTForType(*F->getType(), DL));
+      MIRBuilder.buildGlobalValue(Reg, F);
+      Info.Callee = MachineOperand::CreateReg(Reg, false);
+    } else {
+      Info.Callee = MachineOperand::CreateGA(F, 0);
+    }
+  } else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
     // IR IFuncs and Aliases can't be forward declared (only defined), so the
     // callee must be in the same TU and therefore we can direct-call it without
     // worrying about it being out of range.
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index e98f6c4984a75..93d6024f34c09 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3202,6 +3202,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   if (Callee && !computeCallAddress(Callee, Addr))
     return false;
 
+  // MO_GOT is not handled. -fno-plt compiled intrinsic calls do not have the
+  // nonlazybind attribute. Check "RtLibUseGOT" instead.
+  if ((Subtarget->classifyGlobalFunctionReference(Addr.getGlobalValue(), TM) !=
+       AArch64II::MO_NO_FLAG) ||
+      MF->getFunction().getParent()->getRtLibUseGOT())
+    return false;
+
   // The weak function target may be zero; in that case we must use indirect
   // addressing via a stub on windows as it may be out of range for a
   // PC-relative jump.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 96ea692d03f56..56de890c78dec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7969,13 +7969,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
     }
   } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
-        Subtarget->isTargetMachO()) {
-      const char *Sym = S->getSymbol();
+    bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
+                   Subtarget->isTargetMachO()) ||
+                  MF.getFunction().getParent()->getRtLibUseGOT();
+    const char *Sym = S->getSymbol();
+    if (UseGot) {
       Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
       Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
     } else {
-      const char *Sym = S->getSymbol();
       Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index cf57d950ae8d7..c4c6827313b5e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -43,10 +43,10 @@ static cl::opt<bool>
 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
                          "an address is ignored"), cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-    UseNonLazyBind("aarch64-enable-nonlazybind",
-                   cl::desc("Call nonlazybind functions via direct GOT load"),
-                   cl::init(false), cl::Hidden);
+static cl::opt<bool> MachOUseNonLazyBind(
+    "aarch64-macho-enable-nonlazybind",
+    cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
+    cl::Hidden);
 
 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
                            cl::desc("Enable the use of AA during codegen."));
@@ -434,7 +434,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(
 
   // NonLazyBind goes via GOT unless we know it's available locally.
   auto *F = dyn_cast<Function>(GV);
-  if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
+  if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
+      F->hasFnAttribute(Attribute::NonLazyBind) &&
       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     return AArch64II::MO_GOT;
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 84057ea8d2214..773eadbf34de3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1273,8 +1273,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
            !Subtarget.noBTIAtReturnTwice() &&
            MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
     Opc = AArch64::BLR_BTI;
-  else
+  else {
+    // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
+    // is set.
+    if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
+      auto Reg =
+          MRI.createGenericVirtualRegister(getLLTForType(*F.getType(), DL));
+      auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+      DstOp(Reg).addDefToMIB(MRI, MIB);
+      MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
+      Info.Callee = MachineOperand::CreateReg(Reg, false);
+    }
     Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+  }
 
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   unsigned CalleeOpNo = 0;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 8344e79f78e1e..e60db260e3ef1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2841,11 +2841,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   }
 
   case TargetOpcode::G_GLOBAL_VALUE: {
-    auto GV = I.getOperand(1).getGlobal();
-    if (GV->isThreadLocal())
-      return selectTLSGlobalValue(I, MRI);
+    const GlobalValue *GV = nullptr;
+    unsigned OpFlags;
+    if (I.getOperand(1).isSymbol()) {
+      OpFlags = I.getOperand(1).getTargetFlags();
+      // Currently only used by "RtLibUseGOT".
+      assert(OpFlags == AArch64II::MO_GOT);
+    } else {
+      GV = I.getOperand(1).getGlobal();
+      if (GV->isThreadLocal())
+        return selectTLSGlobalValue(I, MRI);
+      OpFlags = STI.ClassifyGlobalReference(GV, TM);
+    }
 
-    unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
     if (OpFlags & AArch64II::MO_GOT) {
       I.setDesc(TII.get(AArch64::LOADgot));
       I.getOperand(1).setTargetFlags(OpFlags);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b561cb12c93a1..83137949d0f24 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1314,6 +1314,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   // By splitting this here, we can optimize accesses in the small code model by
   // folding in the G_ADD_LOW into the load/store offset.
   auto &GlobalOp = MI.getOperand(1);
+  // Don't modify an intrinsic call.
+  if (GlobalOp.isSymbol())
+    return true;
   const auto* GV = GlobalOp.getGlobal();
   if (GV->isThreadLocal())
     return true; // Don't want to modify TLS vars.
diff --git a/llvm/test/CodeGen/AArch64/nonlazybind.ll b/llvm/test/CodeGen/AArch64/nonlazybind.ll
index 669a8ee04b249..f5bb3a4ecbc9a 100644
--- a/llvm/test/CodeGen/AArch64/nonlazybind.ll
+++ b/llvm/test/CodeGen/AArch64/nonlazybind.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
+; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
 ; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL
 ; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI
 ; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI
@@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind {
 ; MACHO-NEXT:  Lloh1:
 ; MACHO-NEXT:    ldr x8, [x8, _external at GOTPAGEOFF]
 ; MACHO-NEXT:    blr x8
+; MACHO-NEXT:  Lloh2:
+; MACHO-NEXT:    adrp x8, _memset at GOTPAGE
 ; MACHO-NEXT:    mov x0, x19
 ; MACHO-NEXT:    mov w1, #1 ; =0x1
+; MACHO-NEXT:  Lloh3:
+; MACHO-NEXT:    ldr x8, [x8, _memset at GOTPAGEOFF]
 ; MACHO-NEXT:    mov w2, #1000 ; =0x3e8
-; MACHO-NEXT:    bl _memset
+; MACHO-NEXT:    blr x8
 ; MACHO-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; MACHO-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; MACHO-NEXT:    ret
+; MACHO-NEXT:    .loh AdrpLdrGot Lloh2, Lloh3
 ; MACHO-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
 ;
 ; MACHO-NORMAL-LABEL: test_laziness:
@@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind {
 ; MACHO-NORMAL-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; MACHO-NORMAL-NEXT:    mov x19, x0
 ; MACHO-NORMAL-NEXT:    bl _external
+; MACHO-NORMAL-NEXT:  Lloh0:
+; MACHO-NORMAL-NEXT:    adrp x8, _memset at GOTPAGE
 ; MACHO-NORMAL-NEXT:    mov x0, x19
 ; MACHO-NORMAL-NEXT:    mov w1, #1 ; =0x1
+; MACHO-NORMAL-NEXT:  Lloh1:
+; MACHO-NORMAL-NEXT:    ldr x8, [x8, _memset at GOTPAGEOFF]
 ; MACHO-NORMAL-NEXT:    mov w2, #1000 ; =0x3e8
-; MACHO-NORMAL-NEXT:    bl _memset
+; MACHO-NORMAL-NEXT:    blr x8
 ; MACHO-NORMAL-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; MACHO-NORMAL-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; MACHO-NORMAL-NEXT:    ret
+; MACHO-NORMAL-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
 ;
-; ELF-FI-LABEL: test_laziness:
-; ELF-FI:       // %bb.0:
-; ELF-FI-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-FI-NEXT:    mov x19, x0
-; ELF-FI-NEXT:    bl external
-; ELF-FI-NEXT:    mov w8, #1 // =0x1
-; ELF-FI-NEXT:    mov x0, x19
-; ELF-FI-NEXT:    mov x2, #1000 // =0x3e8
-; ELF-FI-NEXT:    uxtb w1, w8
-; ELF-FI-NEXT:    bl memset
-; ELF-FI-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-FI-NEXT:    ret
-;
-; ELF-GI-LABEL: test_laziness:
-; ELF-GI:       // %bb.0:
-; ELF-GI-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-GI-NEXT:    mov x19, x0
-; ELF-GI-NEXT:    bl external
-; ELF-GI-NEXT:    mov x0, x19
-; ELF-GI-NEXT:    mov w1, #1 // =0x1
-; ELF-GI-NEXT:    mov w2, #1000 // =0x3e8
-; ELF-GI-NEXT:    bl memset
-; ELF-GI-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-GI-NEXT:    ret
-;
-; ELF-SDAG-LABEL: test_laziness:
-; ELF-SDAG:       // %bb.0:
-; ELF-SDAG-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; ELF-SDAG-NEXT:    mov x19, x0
-; ELF-SDAG-NEXT:    bl external
-; ELF-SDAG-NEXT:    mov x0, x19
-; ELF-SDAG-NEXT:    mov w1, #1 // =0x1
-; ELF-SDAG-NEXT:    mov w2, #1000 // =0x3e8
-; ELF-SDAG-NEXT:    bl memset
-; ELF-SDAG-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; ELF-SDAG-NEXT:    ret
+; ELF-LABEL: test_laziness:
+; ELF:       // %bb.0:
+; ELF-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; ELF-NEXT:    adrp x8, :got:external
+; ELF-NEXT:    mov x19, x0
+; ELF-NEXT:    ldr x8, [x8, :got_lo12:external]
+; ELF-NEXT:    blr x8
+; ELF-NEXT:    adrp x8, :got:memset
+; ELF-NEXT:    mov x0, x19
+; ELF-NEXT:    mov w1, #1 // =0x1
+; ELF-NEXT:    ldr x8, [x8, :got_lo12:memset]
+; ELF-NEXT:    mov w2, #1000 // =0x3e8
+; ELF-NEXT:    blr x8
+; ELF-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; ELF-NEXT:    ret
   call void @external()
   call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false)
   ret void
@@ -86,12 +75,12 @@ define void @test_laziness(ptr %a) nounwind {
 define void @test_laziness_tail() nounwind {
 ; MACHO-LABEL: test_laziness_tail:
 ; MACHO:       ; %bb.0:
-; MACHO-NEXT:  Lloh2:
+; MACHO-NEXT:  Lloh4:
 ; MACHO-NEXT:    adrp x0, _external at GOTPAGE
-; MACHO-NEXT:  Lloh3:
+; MACHO-NEXT:  Lloh5:
 ; MACHO-NEXT:    ldr x0, [x0, _external at GOTPAGEOFF]
 ; MACHO-NEXT:    br x0
-; MACHO-NEXT:    .loh AdrpLdrGot Lloh2, Lloh3
+; MACHO-NEXT:    .loh AdrpLdrGot Lloh4, Lloh5
 ;
 ; MACHO-NORMAL-LABEL: test_laziness_tail:
 ; MACHO-NORMAL:       ; %bb.0:
@@ -99,7 +88,9 @@ define void @test_laziness_tail() nounwind {
 ;
 ; ELF-LABEL: test_laziness_tail:
 ; ELF:       // %bb.0:
-; ELF-NEXT:    b external
+; ELF-NEXT:    adrp x0, :got:external
+; ELF-NEXT:    ldr x0, [x0, :got_lo12:external]
+; ELF-NEXT:    br x0
   tail call void @external()
   ret void
 }
@@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 7, !"RtLibUseGOT", i32 1}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ELF-FI: {{.*}}
+; ELF-GI: {{.*}}
+; ELF-SDAG: {{.*}}



More information about the cfe-commits mailing list