[llvm] [StackFrameLayoutAnalysis] Use target-specific hook for SP offsets (PR #100386)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 11:02:22 PDT 2024
================
@@ -90,13 +90,167 @@ entry:
ret i32 0
}
+; In the presence of dynamic stack-realignment we emit correct offsets for
+; objects which are not realigned. For realigned objects, e.g. the i32 alloca
+; in this test, we emit the correct offset ignoring the re-alignment (i.e. the
+; offset if the alignment requirement is already satisfied).
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_dynamicrealign
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128-16 x vscale], Type: Variable, Align: 128, Size: 4
+
+define i32 @csr_d8_allocnxv4i32i32f64_dynamicrealign(double %d) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_dynamicrealign:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: sub x9, sp, #96
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: addvl x9, x9, #-1
+; CHECK-NEXT: and sp, x9, #0xffffffffffffff80
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset b8, -32
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str wzr, [sp]
+; CHECK-NEXT: stur d0, [x29, #-8]
+; CHECK-NEXT: st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT: sub sp, x29, #16
+; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %b = alloca i32, align 128
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %b
+ store double %d, ptr %c
+ ret i32 0
+}
+
+; In the presence of VLA-area objects, we emit correct offsets for all objects
+; except for these VLA objects.
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_vla
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
+
+define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #8
+; CHECK-NEXT: str x19, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: .cfi_def_cfa w29, 24
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset w29, -24
+; CHECK-NEXT: .cfi_offset b8, -32
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: ubfiz x8, x0, #2, #32
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: add x8, x8, #15
+; CHECK-NEXT: and x8, x8, #0x7fffffff0
+; CHECK-NEXT: sub x8, x9, x8
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str wzr, [x8]
+; CHECK-NEXT: sub x8, x29, #8
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: str d0, [x19, #8]
+; CHECK-NEXT: st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT: sub sp, x29, #8
+; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %0 = zext i32 %i to i64
+ %b = alloca i32, i64 %0
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %b
+ store double %d, ptr %c
+ ret i32 0
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_stackargsi32f64
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+8], Type: Variable, Align: 8, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Protector, Align: 16, Size: 8
----------------
davemgreen wrote:
As far as I understand the stack function argument have negative index slot numbers as they start above the beginning of the stack, so -1 is the first. A real stack protector should be >= 0 though. We use `std::numeric_limits<int>::max()` in other places instead of -1 as the "not-present" identity value.
https://github.com/llvm/llvm-project/pull/100386
More information about the llvm-commits
mailing list