[llvm] [ARM, ELF] Fix access to dso_preemptable __stack_chk_guard with static relocation model (PR #70014)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 15:35:04 PDT 2023
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/70014
>From f9e5f4adb9c4a3d923996b7030c98ba085cc98af Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Mon, 23 Oct 2023 23:43:48 -0700
Subject: [PATCH] [ARM,ELF] Fix access to dso_preemptable __stack_chk_guard
with static relocation model
The ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel when
`TM.isPositionIndependent()` but uses a different condition
`Subtarget.isGVIndirectSymbol(GV)` (aka dso_preemptable on ELF targets). This
would cause incorrect access for dso_preemptable `__stack_chk_guard` with the
static relocation model.
Regarding whether `__stack_chk_guard` gets the dso_local specifier,
https://reviews.llvm.org/D150841 switched to `M.getDirectAccessExternalData()`
(implied by "PIC Level") instead of `TM.getRelocationModel() == Reloc::Static`.
The result is that when non-zero "PIC Level" is used with static relocation
model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking), `__stack_chk_guard`
accesses are incorrect.
```
ldr r0, .LCPI0_0
ldr r0, [r0]
ldr r0, [r0] // incorrectly dereferences __stack_chk_guard
...
.LCPI0_0:
.long __stack_chk_guard
```
To fix this, for dso_preemptable `__stack_chk_guard`, emit a GOT PIC
code sequence like for -fpic using `LDRLIT_ga_pcrel`:
```
ldr r0, .LCPI0_0
.LPC0_0:
add r0, pc, r0
ldr r0, [r0]
ldr r0, [r0]
...
LCPI0_0:
.Ltmp0:
.long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
```
Technically, `LDRLIT_ga_abs` with `R_ARM_GOT_ABS` could be used, but
`R_ARM_GOT_ABS` does not have GNU or integrated assembler support. (Note,
`.LCPI0_0: .long __stack_chk_guard at GOT` produces an `R_ARM_GOT_BREL`, which is
not desired).
This patch fixes #6499 while not changing behavior for the following configurations:
```
run arm.linux.nopic --target=arm-linux-gnueabi -fno-pic
run arm.linux.pie --target=arm-linux-gnueabi -fpie
run arm.linux.pic --target=arm-linux-gnueabi -fpic
run armv6.darwin.nopic --target=armv6-apple-darwin -fno-pic
run armv6.darwin.dynamicnopic --target=armv6-apple-darwin -mdynamic-no-pic
run armv6.darwin.pic --target=armv6-apple-darwin -fpic
run armv7.darwin.nopic --target=armv7-apple-darwin -mcpu=cortex-a8 -fno-pic
run armv7.darwin.dynamicnopic --target=armv7-apple-darwin -mcpu=cortex-a8 -mdynamic-no-pic
run armv7.darwin.pic --target=armv7-apple-darwin -mcpu=cortex-a8 -fpic
run arm64.darwin.pic --target=arm64-apple-darwin
```
TargetLoweringBase::insertSSPDeclarations: make condition preciser
This change is not strictly needed, but it ensures that we follow the
model that direct accesses are only emitted for dso_local and we
do not need TargetMachine::shouldAssumeDSOLocal to force dso_local
for a dso_preemptable variable
---
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +-
llvm/lib/Target/ARM/ARMInstrInfo.cpp | 7 +++++--
llvm/test/CodeGen/ARM/stack-guard-elf.ll | 22 ++++++++++++++++++----
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ffdde0360cf623..4c78379ccf5c467 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4978,7 +4978,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
TargetFlags |= ARMII::MO_DLLIMPORT;
else if (IsIndirect)
TargetFlags |= ARMII::MO_COFFSTUB;
- } else if (Subtarget.isGVInGOT(GV)) {
+ } else if (IsIndirect) {
TargetFlags |= ARMII::MO_GOT;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 00db13f2eb520d7..ccc883f646a62ad 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -104,8 +104,11 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
- if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
- if (TM.isPositionIndependent())
+ bool ForceELFGOTPIC = Subtarget.isTargetELF() && !GV->isDSOLocal();
+ if (!Subtarget.useMovt() || ForceELFGOTPIC) {
+ // For ELF non-PIC, use GOT PIC code sequence as well because R_ARM_GOT_ABS
+ // does not have assembler support.
+ if (TM.isPositionIndependent() || ForceELFGOTPIC)
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
diff --git a/llvm/test/CodeGen/ARM/stack-guard-elf.ll b/llvm/test/CodeGen/ARM/stack-guard-elf.ll
index 6c0421d6a3c9e35..7a342d003c72d94 100644
--- a/llvm/test/CodeGen/ARM/stack-guard-elf.ll
+++ b/llvm/test/CodeGen/ARM/stack-guard-elf.ll
@@ -1,5 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+;; direct-access-external-data is false due to PIC Level, so __stack_chk_guard
+;; is dso_preemtable. Check that we use GOT PIC code sequence as well because
+;; R_ARM_GOT_ABS does not have assembler support.
; RUN: llc -relocation-model=static < %s | FileCheck %s
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
target triple = "armv7a-linux-gnueabi"
@@ -9,16 +13,18 @@ define i32 @test1() #0 {
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: sub sp, sp, #1024
-; CHECK-NEXT: movw r0, :lower16:__stack_chk_guard
-; CHECK-NEXT: movt r0, :upper16:__stack_chk_guard
+; CHECK-NEXT: ldr r0, .LCPI0_0
+; CHECK-NEXT: .LPC0_0:
+; CHECK-NEXT: add r0, pc, r0
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: str r0, [sp, #1028]
; CHECK-NEXT: add r0, sp, #4
; CHECK-NEXT: bl foo
-; CHECK-NEXT: movw r1, :lower16:__stack_chk_guard
; CHECK-NEXT: ldr r0, [sp, #1028]
-; CHECK-NEXT: movt r1, :upper16:__stack_chk_guard
+; CHECK-NEXT: ldr r1, .LCPI0_1
+; CHECK-NEXT: .LPC0_1:
+; CHECK-NEXT: add r1, pc, r1
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: cmp r1, r0
@@ -28,6 +34,14 @@ define i32 @test1() #0 {
; CHECK-NEXT: popeq {r11, pc}
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: bl __stack_chk_fail
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.2:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
+; CHECK-NEXT: .LCPI0_1:
+; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+8)-.Ltmp1)
%a1 = alloca [256 x i32], align 4
call void @foo(ptr %a1) #3
ret i32 0
More information about the llvm-commits
mailing list