[llvm] [ARM, ELF] Fix access to dso_preemptable __stack_chk_guard with static relocation model (PR #70014)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 15:35:44 PDT 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/70014

>From a82a1260d297242a4539861361b77ed66d86aa68 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Mon, 23 Oct 2023 23:43:48 -0700
Subject: [PATCH] [ARM,ELF] Fix access to dso_preemptable __stack_chk_guard
 with static relocation model

The ELF code from https://reviews.llvm.org/D112811 emits LDRLIT_ga_pcrel
when `TM.isPositionIndependent()` but uses a different condition
`Subtarget.isGVIndirectSymbol(GV)` (aka dso_preemptable on ELF targets).
This would cause incorrect access for dso_preemptable
`__stack_chk_guard` with the static relocation model.

Regarding whether `__stack_chk_guard` gets the dso_local specifier,
https://reviews.llvm.org/D150841 switched to
`M.getDirectAccessExternalData()` (implied by "PIC Level") instead of
`TM.getRelocationModel() == Reloc::Static`.

The result is that when non-zero "PIC Level" is used with static
relocation model (e.g. -fPIE/-fPIC LTO compiles with -no-pie linking),
`__stack_chk_guard` accesses are incorrect.
```
        ldr     r0, .LCPI0_0
        ldr     r0, [r0]
        ldr     r0, [r0]   // incorrectly dereferences __stack_chk_guard
...
.LCPI0_0:
        .long   __stack_chk_guard
```

To fix this, for dso_preemptable `__stack_chk_guard`, emit a GOT PIC
code sequence like for -fpic using `LDRLIT_ga_pcrel`:
```
        ldr     r0, .LCPI0_0
.LPC0_0:
        add     r0, pc, r0
        ldr     r0, [r0]
        ldr     r0, [r0]
...
LCPI0_0:
.Ltmp0:
        .long   __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
```

Technically, `LDRLIT_ga_abs` with `R_ARM_GOT_ABS` could be used, but
`R_ARM_GOT_ABS` does not have GNU or integrated assembler support.
(Note, `.LCPI0_0: .long __stack_chk_guard at GOT` produces an
`R_ARM_GOT_BREL`, which is not desired).

This patch fixes #6499 while not changing behavior for the following
configurations:
```
run arm.linux.nopic --target=arm-linux-gnueabi -fno-pic
run arm.linux.pie --target=arm-linux-gnueabi -fpie
run arm.linux.pic --target=arm-linux-gnueabi -fpic
run armv6.darwin.nopic --target=armv6-apple-darwin -fno-pic
run armv6.darwin.dynamicnopic --target=armv6-apple-darwin -mdynamic-no-pic
run armv6.darwin.pic --target=armv6-apple-darwin -fpic
run armv7.darwin.nopic --target=armv7-apple-darwin -mcpu=cortex-a8 -fno-pic
run armv7.darwin.dynamicnopic --target=armv7-apple-darwin -mcpu=cortex-a8 -mdynamic-no-pic
run armv7.darwin.pic --target=armv7-apple-darwin -mcpu=cortex-a8 -fpic
run arm64.darwin.pic --target=arm64-apple-darwin
```
---
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp |  2 +-
 llvm/lib/Target/ARM/ARMInstrInfo.cpp     |  7 +++++--
 llvm/test/CodeGen/ARM/stack-guard-elf.ll | 22 ++++++++++++++++++----
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ffdde0360cf623..4c78379ccf5c467 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4978,7 +4978,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
         TargetFlags |= ARMII::MO_DLLIMPORT;
       else if (IsIndirect)
         TargetFlags |= ARMII::MO_COFFSTUB;
-    } else if (Subtarget.isGVInGOT(GV)) {
+    } else if (IsIndirect) {
       TargetFlags |= ARMII::MO_GOT;
     }
 
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 00db13f2eb520d7..ccc883f646a62ad 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -104,8 +104,11 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
   const GlobalValue *GV =
       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
 
-  if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
-    if (TM.isPositionIndependent())
+  bool ForceELFGOTPIC = Subtarget.isTargetELF() && !GV->isDSOLocal();
+  if (!Subtarget.useMovt() || ForceELFGOTPIC) {
+    // For ELF non-PIC, use GOT PIC code sequence as well because R_ARM_GOT_ABS
+    // does not have assembler support.
+    if (TM.isPositionIndependent() || ForceELFGOTPIC)
       expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
     else
       expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
diff --git a/llvm/test/CodeGen/ARM/stack-guard-elf.ll b/llvm/test/CodeGen/ARM/stack-guard-elf.ll
index 6c0421d6a3c9e35..7a342d003c72d94 100644
--- a/llvm/test/CodeGen/ARM/stack-guard-elf.ll
+++ b/llvm/test/CodeGen/ARM/stack-guard-elf.ll
@@ -1,5 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+;; direct-access-external-data is false due to PIC Level, so __stack_chk_guard
+;; is dso_preemtable. Check that we use GOT PIC code sequence as well because
+;; R_ARM_GOT_ABS does not have assembler support.
 ; RUN: llc -relocation-model=static < %s | FileCheck %s
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
 
 target triple = "armv7a-linux-gnueabi"
 
@@ -9,16 +13,18 @@ define i32 @test1() #0 {
 ; CHECK-NEXT:    push {r11, lr}
 ; CHECK-NEXT:    sub sp, sp, #8
 ; CHECK-NEXT:    sub sp, sp, #1024
-; CHECK-NEXT:    movw r0, :lower16:__stack_chk_guard
-; CHECK-NEXT:    movt r0, :upper16:__stack_chk_guard
+; CHECK-NEXT:    ldr r0, .LCPI0_0
+; CHECK-NEXT:  .LPC0_0:
+; CHECK-NEXT:    add r0, pc, r0
 ; CHECK-NEXT:    ldr r0, [r0]
 ; CHECK-NEXT:    ldr r0, [r0]
 ; CHECK-NEXT:    str r0, [sp, #1028]
 ; CHECK-NEXT:    add r0, sp, #4
 ; CHECK-NEXT:    bl foo
-; CHECK-NEXT:    movw r1, :lower16:__stack_chk_guard
 ; CHECK-NEXT:    ldr r0, [sp, #1028]
-; CHECK-NEXT:    movt r1, :upper16:__stack_chk_guard
+; CHECK-NEXT:    ldr r1, .LCPI0_1
+; CHECK-NEXT:  .LPC0_1:
+; CHECK-NEXT:    add r1, pc, r1
 ; CHECK-NEXT:    ldr r1, [r1]
 ; CHECK-NEXT:    ldr r1, [r1]
 ; CHECK-NEXT:    cmp r1, r0
@@ -28,6 +34,14 @@ define i32 @test1() #0 {
 ; CHECK-NEXT:    popeq {r11, pc}
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    bl __stack_chk_fail
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
+; CHECK-NEXT:  .LCPI0_1:
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+8)-.Ltmp1)
   %a1 = alloca [256 x i32], align 4
   call void @foo(ptr %a1) #3
   ret i32 0



More information about the llvm-commits mailing list