[llvm] [MachineOutliner] Don't outline ADRP pair to avoid incorrect ICF (PR #160232)

Pranav Kant via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 16:23:05 PDT 2025


https://github.com/pranavk updated https://github.com/llvm/llvm-project/pull/160232

>From b4216f16d11c97efb373b5bef8c902d31759799f Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Mon, 22 Sep 2025 21:48:09 -0700
Subject: [PATCH 1/2] [MachineOutliner] Don't outline ADRP pair to avoid
 incorrect ICF

Fixes #131660

Earlier attempts to fix this in the linker were not accepted. Current
attempts is pending at #139493
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  30 +++-
 .../machine-outliner-adrp-got-split.mir       | 130 ++++++++++++++++++
 2 files changed, 156 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5a51c812732e6..8880ca455c1f6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -10179,11 +10179,33 @@ AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
       return outliner::InstrType::Illegal;
   }
 
-  // Special cases for instructions that can always be outlined, but will fail
-  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
-  // be outlined because they don't require a *specific* value to be in LR.
-  if (MI.getOpcode() == AArch64::ADRP)
+  // An ADRP instruction referencing a GOT should not be outlined.
+  // This is to avoid splitting ADRP/(LDR/ADD/etc.) pair into different
+  // functions which can lead to linker ICF merging sections incorrectly.
+  if (MI.getOpcode() == AArch64::ADRP) {
+    bool IsPage = (MI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0;
+    bool IsGot = (MI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0;
+    if (IsPage && IsGot)
+      return outliner::InstrType::Illegal;
+
+    // Special cases for instructions that can always be outlined, but will fail
+    // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
+    // be outlined because they don't require a *specific* value to be in LR.
     return outliner::InstrType::Legal;
+  }
+
+  // Similarly, any user of ADRP instruction referencing a GOT should not be
+  // outlined. It's hard/costly to check exact users of ADRP. So we use check
+  // all operands and reject any that's a page offset and references a GOT.
+  const auto &F = MI.getMF()->getFunction();
+  for (const auto &MO : MI.operands()) {
+    bool IsPageOff = (MO.getTargetFlags() & AArch64II::MO_PAGEOFF) != 0;
+    bool IsGot = (MO.getTargetFlags() & AArch64II::MO_GOT) != 0;
+    if (IsPageOff && IsGot &&
+        (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
+         F.hasSection() || F.getSectionPrefix()))
+      return outliner::InstrType::Illegal;
+  }
 
   // If MI is a call we might be able to outline it. We don't want to outline
   // any calls that rely on the position of items on the stack. When we outline
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir b/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir
new file mode 100644
index 0000000000000..169835809d6ba
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=aarch64---  -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+
+  @x = common global i32 0, align 4
+
+  define i32 @adrp_add() #0 {
+    ret i32 0
+  }
+
+  define i32 @adrp_ldr() #0 {
+    ret i32 0
+  }
+
+  define void @bar(i32 %a) #0 {
+    ret void
+  }
+
+  attributes #0 = { noinline noredzone }
+...
+---
+# This test ensures that we do not outline ADRP / ADD pair when it's referencing 
+# a GOT entry.
+#
+# CHECK-LABEL: name: adrp_add
+# CHECK-DAG: bb.0:
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+
+# CHECK-DAG: bb.1
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+
+# CHECK-DAG: bb.2
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+name:            adrp_add
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+    $lr = ORRXri $xzr, 1
+  bb.1:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+    $lr = ORRXri $xzr, 1
+  bb.2:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+    $lr = ORRXri $xzr, 1
+  bb.3:
+  liveins: $lr
+    RET undef $lr
+...
+---
+# This test ensures that we do not outline ADRP / LDR pair when it's referencing 
+# a GOT entry.
+#
+# CHECK-LABEL: name: adrp_ldr
+# CHECK-DAG: bb.0:
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+
+# CHECK-DAG: bb.1
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+
+# CHECK-DAG: bb.2
+# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+name:            adrp_ldr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+    $lr = ORRXri $xzr, 1
+  bb.1:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+    $lr = ORRXri $xzr, 1
+  bb.2:
+  liveins: $lr
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $w12 = ORRWri $wzr, 1
+    $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+    $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+    $lr = ORRXri $xzr, 1
+  bb.3:
+  liveins: $lr
+    RET undef $lr
\ No newline at end of file

>From 48d7e62a72fcb011e3959b4dd8d6b8b9c32c7a1b Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Thu, 9 Oct 2025 16:14:47 -0700
Subject: [PATCH 2/2] Allow outlining ADRP pair as a whole

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 43 +++++--------
 .../machine-outliner-adrp-got-split.mir       | 61 ++++++++++---------
 2 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8880ca455c1f6..6af8450da1625 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9547,6 +9547,21 @@ AArch64InstrInfo::getOutliningCandidateInfo(
 
   unsigned NumBytesToCreateFrame = 0;
 
+  // Avoid splitting ADRP-ADD/LDR pair into outlined functions.
+  MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
+  MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
+  if (LastMI.getOpcode() == AArch64::ADRP &&
+      (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
+      (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
+    return std::nullopt;
+  }
+
+  if ((FirstMI.getOpcode() == AArch64::ADDXri || FirstMI.getOpcode() == AArch64::LDRXui) &&
+       (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
+       (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
+    return std::nullopt;
+  }
+
   // We only allow outlining for functions having exactly matching return
   // address signing attributes, i.e., all share the same value for the
   // attribute "sign-return-address" and all share the same type of key they
@@ -10179,34 +10194,6 @@ AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
       return outliner::InstrType::Illegal;
   }
 
-  // An ADRP instruction referencing a GOT should not be outlined.
-  // This is to avoid splitting ADRP/(LDR/ADD/etc.) pair into different
-  // functions which can lead to linker ICF merging sections incorrectly.
-  if (MI.getOpcode() == AArch64::ADRP) {
-    bool IsPage = (MI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0;
-    bool IsGot = (MI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0;
-    if (IsPage && IsGot)
-      return outliner::InstrType::Illegal;
-
-    // Special cases for instructions that can always be outlined, but will fail
-    // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
-    // be outlined because they don't require a *specific* value to be in LR.
-    return outliner::InstrType::Legal;
-  }
-
-  // Similarly, any user of ADRP instruction referencing a GOT should not be
-  // outlined. It's hard/costly to check exact users of ADRP. So we use check
-  // all operands and reject any that's a page offset and references a GOT.
-  const auto &F = MI.getMF()->getFunction();
-  for (const auto &MO : MI.operands()) {
-    bool IsPageOff = (MO.getTargetFlags() & AArch64II::MO_PAGEOFF) != 0;
-    bool IsGot = (MO.getTargetFlags() & AArch64II::MO_GOT) != 0;
-    if (IsPageOff && IsGot &&
-        (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
-         F.hasSection() || F.getSectionPrefix()))
-      return outliner::InstrType::Illegal;
-  }
-
   // If MI is a call we might be able to outline it. We don't want to outline
   // any calls that rely on the position of items on the stack. When we outline
   // something containing a call, we have to emit a save and restore of LR in
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir b/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir
index 169835809d6ba..c397953b68f5e 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-adrp-got-split.mir
@@ -11,28 +11,16 @@
     ret i32 0
   }
 
-  define void @bar(i32 %a) #0 {
-    ret void
-  }
-
   attributes #0 = { noinline noredzone }
 ...
 ---
-# This test ensures that we do not outline ADRP / ADD pair when it's referencing 
-# a GOT entry.
+# Check that main function body doesn't split ADRP pair
 #
 # CHECK-LABEL: name: adrp_add
 # CHECK-DAG: bb.0:
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
-
-# CHECK-DAG: bb.1
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
-
-# CHECK-DAG: bb.2
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0
+# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
+# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F2:[0-9]+]]
+# CHECK-NEXT: $lr = ORRXri $xzr, 1
 name:            adrp_add
 tracksRegLiveness: true
 body:             |
@@ -74,21 +62,13 @@ body:             |
     RET undef $lr
 ...
 ---
-# This test ensures that we do not outline ADRP / LDR pair when it's referencing 
-# a GOT entry.
+# Check that main function body doesn't split ADRP pair
 #
 # CHECK-LABEL: name: adrp_ldr
 # CHECK-DAG: bb.0:
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
-
-# CHECK-DAG: bb.1
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
-
-# CHECK-DAG: bb.2
-# CHECK: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
-# CHECK: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
+# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]]
+# CHECK-NEXT: $lr = ORRXri $xzr, 1
 name:            adrp_ldr
 tracksRegLiveness: true
 body:             |
@@ -127,4 +107,27 @@ body:             |
     $lr = ORRXri $xzr, 1
   bb.3:
   liveins: $lr
-    RET undef $lr
\ No newline at end of file
+    RET undef $lr
+
+# Check that no outlined function split the ADRP pair apart
+#
+# CHECK: OUTLINED_FUNCTION_[[F0]]
+# CHECK-DAG: bb.0
+# CHECK: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: RET $lr
+
+# CHECK: OUTLINED_FUNCTION_[[F1]]
+# CHECK-DAG: bb.0
+# CHECK: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK-NEXT: $x12 = LDRXui $x9, target-flags(aarch64-pageoff, aarch64-got) @x
+
+# CHECK: name: OUTLINED_FUNCTION_[[F2]]
+# CHECK-DAG: bb.0
+# CHECK: $w12 = ORRWri $wzr, 1
+# CHECK-NEXT: $x9 = ADRP target-flags(aarch64-page, aarch64-got) @x
+# CHECK-NEXT: $x12 = ADDXri $x9, target-flags(aarch64-pageoff, aarch64-got) @x, 0



More information about the llvm-commits mailing list