[llvm] [CFI][stackprobe] Shrink wrapper select safe prologue insertion block… (PR #81676)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 13 14:09:44 PST 2024


https://github.com/yozhu created https://github.com/llvm/llvm-project/pull/81676

… when inline stack probing is enabled

Summary:
For the switch-case code at the entry of this function: https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/stable_sort.h#L193C1-L193C5 LLVM produces machine code at one point like:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3

  %49:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `machine-cse` pass later removes the `SUBSXri` instruction in the 2nd block and adds `$nzcv` as its `liveins`, which is legitimate:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3
  liveins: $nzcv

  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `shrinkwrap` pass later decides that we can put prologue inside `bb.20`, which also seems fine. And then the `prologepilog` pass inserts regular prologue sequence in the 2nd basic block, plus some stack probing code, which trashes `$nzcv` and is caught by an assertion:

```
Assertion `!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"' failed.
```

In the 2nd block, the inserted stack probing instruction (with the destination as `x9`) redefines the live-in register `$nzcv`:

```
bb.1:
; predecessors: %bb.0
  successors: %bb.2, %bb.4
  liveins: $nzcv, $x0, $x1, $x2, $x3, $x4, $lr, $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20
  early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp(tied-def 0), -12 :: (store (s64) into %stack.25), (store (s64) into %stack.24)
  frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.23), (store (s64) into %stack.22)
  frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.21), (store (s64) into %stack.20)
  frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.19), (store (s64) into %stack.18)
  frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.17), (store (s64) into %stack.16)
  frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.15), (store (s64) into %stack.14)
  $x9 = PROBED_STACKALLOC 432, 96, 0, implicit-def $sp, implicit-def $nzcv, implicit $sp
  frame-setup CFI_INSTRUCTION def_cfa_offset 528
  frame-setup CFI_INSTRUCTION offset $w19, -8
  frame-setup CFI_INSTRUCTION offset $w20, -16
  frame-setup CFI_INSTRUCTION offset $w21, -24
  frame-setup CFI_INSTRUCTION offset $w22, -32
  frame-setup CFI_INSTRUCTION offset $w23, -40
  frame-setup CFI_INSTRUCTION offset $w24, -48
  frame-setup CFI_INSTRUCTION offset $w25, -56
  frame-setup CFI_INSTRUCTION offset $w26, -64
  frame-setup CFI_INSTRUCTION offset $w27, -72
  frame-setup CFI_INSTRUCTION offset $w28, -80
  frame-setup CFI_INSTRUCTION offset $w30, -88
  frame-setup CFI_INSTRUCTION offset $w29, -96
  renamable $x21 = COPY $x1
  renamable $x20 = COPY $x0
  Bcc 1, %bb.4, implicit $nzcv;
  B %bb.2;
```

One way to fix this is to change `ShrinkWrap` to go one scope out when such situation is detected, so potential stack probing instruction won't trash the lived in flag register.

>From 7a7b79aef23e17767053b50412b8027a925ae6c1 Mon Sep 17 00:00:00 2001
From: YongKang Zhu <yongzhu at meta.com>
Date: Tue, 6 Feb 2024 15:25:13 -0800
Subject: [PATCH] [CFI][stackprobe] Shrink wrapper select safe prologue
 insertion block when inline stack probing is enabled

Summary:
For the switch-case code at the entry of this function:
https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/stable_sort.h#L193C1-L193C5
LLVM produces machine code at one point like:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3

  %49:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `machine-cse` pass later removes the `SUBSXri` instruction in the 2nd
block and adds `$nzcv` as its `liveins`, which is legitimate:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3
  liveins: $nzcv

  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `shrinkwrap` pass later decides that we can put prologue inside `bb.20`,
which also seems fine. And then the `prologepilog` pass inserts regular prologue
sequence in the 2nd basic block, plus some stack probing code, which trashes
`$nzcv` and is caught by an assertion:

```
Assertion `!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"' failed.
```

In the 2nd block, the inserted stack probing instruction (with the destination
as `x9`) redefines the live-in register `$nzcv`:

```
bb.1:
; predecessors: %bb.0
  successors: %bb.2, %bb.4
  liveins: $nzcv, $x0, $x1, $x2, $x3, $x4, $lr, $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20
  early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp(tied-def 0), -12 :: (store (s64) into %stack.25), (store (s64) into %stack.24)
  frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.23), (store (s64) into %stack.22)
  frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.21), (store (s64) into %stack.20)
  frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.19), (store (s64) into %stack.18)
  frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.17), (store (s64) into %stack.16)
  frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.15), (store (s64) into %stack.14)
  $x9 = PROBED_STACKALLOC 432, 96, 0, implicit-def $sp, implicit-def $nzcv, implicit $sp
  frame-setup CFI_INSTRUCTION def_cfa_offset 528
  frame-setup CFI_INSTRUCTION offset $w19, -8
  frame-setup CFI_INSTRUCTION offset $w20, -16
  frame-setup CFI_INSTRUCTION offset $w21, -24
  frame-setup CFI_INSTRUCTION offset $w22, -32
  frame-setup CFI_INSTRUCTION offset $w23, -40
  frame-setup CFI_INSTRUCTION offset $w24, -48
  frame-setup CFI_INSTRUCTION offset $w25, -56
  frame-setup CFI_INSTRUCTION offset $w26, -64
  frame-setup CFI_INSTRUCTION offset $w27, -72
  frame-setup CFI_INSTRUCTION offset $w28, -80
  frame-setup CFI_INSTRUCTION offset $w30, -88
  frame-setup CFI_INSTRUCTION offset $w29, -96
  renamable $x21 = COPY $x1
  renamable $x20 = COPY $x0
  Bcc 1, %bb.4, implicit $nzcv;
  B %bb.2;
```

One way to fix this is to change `ShrinkWrap` to go one scope out when such
situation is detected, so potential stack probing instruction won't trash
the lived in flag register.
---
 llvm/include/llvm/CodeGen/TargetLowering.h      | 5 +++++
 llvm/lib/CodeGen/ShrinkWrap.cpp                 | 9 +++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h   | 3 +++
 llvm/lib/Target/X86/X86ISelLowering.cpp         | 5 +++++
 llvm/lib/Target/X86/X86ISelLowering.h           | 3 +++
 6 files changed, 30 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 612433b54f6e44..1e607a1c714afc 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2010,6 +2010,11 @@ class TargetLoweringBase {
 
   virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; }
 
+  virtual bool
+  isStackProbeInstrDefinedFlagRegLiveIn(const MachineBasicBlock *MBB) const {
+    return false;
+  }
+
   virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const {
     return "";
   }
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index ab57d08e527e4d..59f836e98a751c 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -957,6 +957,15 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   if (!ArePointsInteresting())
     return Changed;
 
+  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+  while (TLI->hasInlineStackProbe(MF) &&
+         TLI->isStackProbeInstrDefinedFlagRegLiveIn(Save)) {
+    Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+    Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+    if (!ArePointsInteresting())
+      return Changed;
+  }
+
   LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
                     << printMBBReference(*Save) << ' '
                     << "\nRestore: " << printMBBReference(*Restore) << '\n');
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a3b7e3128ac1a4..ea7d955c49efc3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27349,3 +27349,8 @@ bool AArch64TargetLowering::hasInlineStackProbe(
   return !Subtarget->isTargetWindows() &&
          MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
 }
+
+bool AArch64TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn(
+    const MachineBasicBlock *MBB) const {
+  return MBB->isLiveIn(AArch64::NZCV);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 436b21fd134632..b22573d1b5fb31 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -997,6 +997,9 @@ class AArch64TargetLowering : public TargetLowering {
   /// True if stack clash protection is enabled for this functions.
   bool hasInlineStackProbe(const MachineFunction &MF) const override;
 
+  bool isStackProbeInstrDefinedFlagRegLiveIn(
+      const MachineBasicBlock *MBB) const override;
+
 private:
   /// Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 18f9871b2bd0c3..89bbd59806f830 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57882,6 +57882,11 @@ X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
                                                         4096);
 }
 
+bool X86TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn(
+    const MachineBasicBlock *MBB) const {
+  return MBB->isLiveIn(X86::EFLAGS);
+}
+
 Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   if (ML && ML->isInnermost() &&
       ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f93c54781846bf..575f9cf63cb5c2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1550,6 +1550,9 @@ namespace llvm {
     bool hasInlineStackProbe(const MachineFunction &MF) const override;
     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
 
+    bool isStackProbeInstrDefinedFlagRegLiveIn(
+        const MachineBasicBlock *MBB) const override;
+
     unsigned getStackProbeSize(const MachineFunction &MF) const;
 
     bool hasVectorBlend() const override { return true; }



More information about the llvm-commits mailing list