[llvm] [ARM, ELF] Fix access to dso_preemptable __stack_chk_guard with static relocation model (PR #70014)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 25 15:27:32 PDT 2023
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/70014
>From 2007cfedf3c795d2884a7fc65dc4d19a4c386250 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Mon, 23 Oct 2023 23:43:48 -0700
Subject: [PATCH 1/2] [ARM,ELF] Fix access to dso_preemptable __stack_chk_guard
with static relocation model
The ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel when
`TM.isPositionIndependent()` but uses a different condition
`Subtarget.isGVIndirectSymbol(GV)` (aka dso_preemptable on ELF targets). This
would cause incorrect access for dso_preemptable `__stack_chk_guard` with the
static relocation model.
Regarding whether `__stack_chk_guard` gets the dso_local specifier,
https://reviews.llvm.org/D150841 switched to `M.getDirectAccessExternalData()`
(implied by "PIC Level") instead of `TM.getRelocationModel() == Reloc::Static`.
The result is that when non-zero "PIC Level" is used with static relocation
model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking), `__stack_chk_guard`
accesses are incorrect.
```
ldr r0, .LCPI0_0
ldr r0, [r0]
ldr r0, [r0] // incorrectly dereferences __stack_chk_guard
...
.LCPI0_0:
.long __stack_chk_guard
```
To fix this, for dso_preemptable `__stack_chk_guard`, emit a GOT PIC
code sequence like for -fpic using `LDRLIT_ga_pcrel`:
```
ldr r0, .LCPI0_0
.LPC0_0:
add r0, pc, r0
ldr r0, [r0]
ldr r0, [r0]
...
LCPI0_0:
.Ltmp0:
.long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
```
Technically, `LDRLIT_ga_abs` with `R_ARM_GOT_ABS` could be used, but
`R_ARM_GOT_ABS` does not have GNU or integrated assembler support. (Note,
`.LCPI0_0: .long __stack_chk_guard at GOT` produces an `R_ARM_GOT_BREL`, which is
not desired).
This patch fixes #6499 while not changing behavior for the following configurations:
```
run arm.linux.nopic --target=arm-linux-gnueabi -fno-pic
run arm.linux.pie --target=arm-linux-gnueabi -fpie
run arm.linux.pic --target=arm-linux-gnueabi -fpic
run armv6.darwin.nopic --target=armv6-apple-darwin -fno-pic
run armv6.darwin.dynamicnopic --target=armv6-apple-darwin -mdynamic-no-pic
run armv6.darwin.pic --target=armv6-apple-darwin -fpic
run armv7.darwin.nopic --target=armv7-apple-darwin -mcpu=cortex-a8 -fno-pic
run armv7.darwin.dynamicnopic --target=armv7-apple-darwin -mcpu=cortex-a8 -mdynamic-no-pic
run armv7.darwin.pic --target=armv7-apple-darwin -mcpu=cortex-a8 -fpic
run arm64.darwin.pic --target=arm64-apple-darwin
```
---
llvm/lib/CodeGen/TargetLoweringBase.cpp | 4 +++-
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +-
llvm/lib/Target/ARM/ARMInstrInfo.cpp | 7 +++++--
llvm/test/CodeGen/ARM/stack_guard_remat.ll | 16 ++++++++++------
llvm/test/LTO/ARM/ssp-static-reloc.ll | 3 ++-
5 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 99eadf4bb9d578b..1d88ac2ad75bc6f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2019,7 +2019,9 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
!TM.getTargetTriple().isOSFreeBSD() &&
- !TM.getTargetTriple().isOSDarwin())
+ (!TM.getTargetTriple().isOSDarwin() ||
+ (TM.getTargetTriple().isARM() &&
+ TM.getRelocationModel() == Reloc::Static)))
GV->setDSOLocal(true);
}
}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ffdde0360cf623..4c78379ccf5c467 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4978,7 +4978,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
TargetFlags |= ARMII::MO_DLLIMPORT;
else if (IsIndirect)
TargetFlags |= ARMII::MO_COFFSTUB;
- } else if (Subtarget.isGVInGOT(GV)) {
+ } else if (IsIndirect) {
TargetFlags |= ARMII::MO_GOT;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 00db13f2eb520d7..ccc883f646a62ad 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -104,8 +104,11 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
- if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
- if (TM.isPositionIndependent())
+ bool ForceELFGOTPIC = Subtarget.isTargetELF() && !GV->isDSOLocal();
+ if (!Subtarget.useMovt() || ForceELFGOTPIC) {
+ // For ELF non-PIC, use GOT PIC code sequence as well because R_ARM_GOT_ABS
+ // does not have assembler support.
+ if (TM.isPositionIndependent() || ForceELFGOTPIC)
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
diff --git a/llvm/test/CodeGen/ARM/stack_guard_remat.ll b/llvm/test/CodeGen/ARM/stack_guard_remat.ll
index ad40212c9b6f1d0..983ef1336daa4ee 100644
--- a/llvm/test/CodeGen/ARM/stack_guard_remat.ll
+++ b/llvm/test/CodeGen/ARM/stack_guard_remat.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=STATIC
-; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=DYNAMIC-NO-PIC
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-V7
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC-V7
-; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=DYNAMIC-NO-PIC-V7
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: cat main.ll pic-flag.ll > pic.ll
+; RUN: llc < pic.ll -mtriple=arm-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < main.ll -mtriple=arm-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=STATIC
+; RUN: llc < main.ll -mtriple=arm-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefixes=NO-PIC,DYNAMIC-NO-PIC
+; RUN: llc < pic.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-V7
+; RUN: llc < main.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC-V7
+; RUN: llc < main.ll -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=DYNAMIC-NO-PIC-V7
;PIC: foo2
;PIC: ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
@@ -47,6 +49,7 @@
;DYNAMIC-NO-PIC-V7: L___stack_chk_guard$non_lazy_ptr:
;DYNAMIC-NO-PIC-V7: .indirect_symbol ___stack_chk_guard
+;--- main.ll
; Function Attrs: nounwind ssp
define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
@@ -67,5 +70,6 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+;--- pic-flag.ll
!llvm.module.flags = !{!0}
!0 = !{i32 7, !"PIC Level", i32 2}
diff --git a/llvm/test/LTO/ARM/ssp-static-reloc.ll b/llvm/test/LTO/ARM/ssp-static-reloc.ll
index c8825c2aae0fbb6..2ea6977ef78e752 100644
--- a/llvm/test/LTO/ARM/ssp-static-reloc.ll
+++ b/llvm/test/LTO/ARM/ssp-static-reloc.ll
@@ -19,11 +19,12 @@ entry:
; CHECK: <foo>:
; CHECK: [[#%x,CURPC:]]:{{.*}} ldr r[[REG1:[0-9]+]], [pc, #0x[[#%x,OFFSET:]]]
+; CHECK-NEXT: add r0, pc, r0
; CHECK-NEXT: ldr r[[REG2:[0-9]+]], [r[[REG1]]]
; CHECK-NEXT: ldr r[[REG3:[0-9]+]], [r[[REG2]]]
; CHECK-NEXT: str r[[REG3]],
; CHECK: [[#CURPC + OFFSET + 8]]:{{.*}}.word
-; CHECK-NEXT: R_ARM_ABS32 __stack_chk_guard
+; CHECK-NEXT: R_ARM_GOT_PREL __stack_chk_guard
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
>From 5bd73aec2656688945276609a0da2c01e2a0b829 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Wed, 25 Oct 2023 15:24:17 -0700
Subject: [PATCH 2/2] TargetLoweringBase::insertSSPDeclarations: make condition
preciser
This change is not strictly needed, but it ensures that we follow the
model that direct accesses are only emitted for dso_local and we
do not need TargetMachine::shouldAssumeDSOLocal to force dso_local
for a dso_preemptable variable
---
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1d88ac2ad75bc6f..722cefb1eddb3c5 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2020,8 +2020,7 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
!TM.getTargetTriple().isOSFreeBSD() &&
(!TM.getTargetTriple().isOSDarwin() ||
- (TM.getTargetTriple().isARM() &&
- TM.getRelocationModel() == Reloc::Static)))
+ TM.getRelocationModel() == Reloc::Static))
GV->setDSOLocal(true);
}
}
More information about the llvm-commits
mailing list