[llvm] [AArch64] Fix wrong condition in `canUseAsPrologue` (PR #81878)

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 15 09:19:31 PST 2024


https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/81878

Inline stack probing code may need a scratch register, hence basic blocks where such register is not available cannot be used as prologues.

Checking for an available scratch regidster was incorrectly skipped when the function uses stack probing.

>From 0432dfc4efdea92961eba96e4bf8f6c3e9df7cf8 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 15 Feb 2024 16:41:42 +0000
Subject: [PATCH] [AArch64] Fix wrong condition in `canUseAsPrologue`

Inline stack probing code may need a scratch register, hence basic
blocks where such register is not available cannot be used as prologues.

Checking for an available scratch regidster was incorrectly skipped when
the function uses stack probing.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |   2 +-
 .../AArch64/stack-probing-no-scratch-reg.mir  | 105 ++++++++++++++++++
 2 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 2d0ca6e6d0d3fc..315e013103c3ac 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1060,7 +1060,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
 
   // Don't need a scratch register if we're not going to re-align the stack or
   // emit stack probes.
-  if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF))
+  if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
     return true;
   // Otherwise, we can use any block as long as it has a scratch register
   // available.
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir
new file mode 100644
index 00000000000000..f50bd9ab4b8a1b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir
@@ -0,0 +1,105 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -o - | FileCheck %s
+--- |
+  target triple = "aarch64-linux"
+
+  define void @f(i32 %n) #0 {
+  entry:
+    %a = alloca i8, i32 150000, align 8
+    %c0 = icmp sle i32 %n, 1
+    br i1 %c0, label %if.then1, label %exit
+
+  if.then1:
+    call void @g(ptr %a)
+    br label %exit
+
+  exit:
+    ret void
+  }
+
+  declare void @g(...)
+
+  attributes #0 = { nounwind "probe-stack"="inline-asm" "stack-probe-size"="4096" }
+
+...
+---
+name:            f
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+  localFrameSize:  150000
+stack:
+  - { id: 0, name: a, type: default, offset: 0, size: 150000, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -150000, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: f
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $lr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+  ; CHECK-NEXT:   $x9 = frame-setup SUBXri $sp, 36, 12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.entry:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 1, 12
+  ; CHECK-NEXT:   frame-setup STRXui $xzr, $sp, 0
+  ; CHECK-NEXT:   $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+  ; CHECK-NEXT:   frame-setup Bcc 1, %bb.3, implicit $nzcv
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 2544, 0
+  ; CHECK-NEXT:   frame-setup STRXui $xzr, $sp, 0
+  ; CHECK-NEXT:   $x9 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 12, %bb.2, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.if.then1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x0 = ADDXri $sp, 0, 0
+  ; CHECK-NEXT:   BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.exit:
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 36, 12
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 2544, 0
+  ; CHECK-NEXT:   early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $w0
+
+    $x9 = IMPLICIT_DEF
+    dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+    Bcc 12, %bb.2, implicit $nzcv
+    B %bb.1
+
+  bb.1.if.then1:
+    successors: %bb.2(0x80000000)
+    liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $x0 = ADDXri %stack.0.a, 0, 0
+    BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+  bb.2.exit:
+    RET_ReallyLR
+
+...



More information about the llvm-commits mailing list