[llvm] r331451 - ARM: don't try to over-align large vectors as arguments.
Tim Northover via llvm-commits
llvm-commits at lists.llvm.org
Thu May 3 05:54:25 PDT 2018
Author: tnorthover
Date: Thu May 3 05:54:25 2018
New Revision: 331451
URL: http://llvm.org/viewvc/llvm-project?rev=331451&view=rev
Log:
ARM: don't try to over-align large vectors as arguments.
By default LLVM thinks very large vectors get aligned to their size when
passed across functions. Unfortunately no-one told the ARM backend so it
doesn't trigger stack realignment and so accesses can cause the usual
misalignment issues (e.g. a data abort).
This changes the ABI alignment to the stack alignment, which in practice
(and as a bonus) also coincides with the alignment "natural" vectors get.
Added:
llvm/trunk/test/CodeGen/ARM/large-vector.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/test/CodeGen/ARM/vuzp.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=331451&r1=331450&r2=331451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu May 3 05:54:25 2018
@@ -14783,6 +14783,18 @@ static bool isHomogeneousAggregate(Type
return (Members > 0 && Members <= 4);
}
+/// Return the correct alignment for the current calling convention.
+unsigned
+ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const {
+ if (!ArgTy->isVectorTy())
+ return DL.getABITypeAlignment(ArgTy);
+
+ // Avoid over-aligning vector parameters. It would require realigning the
+ // stack and waste space for no real benefit.
+ return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment());
+}
+
/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
/// passing according to AAPCS rules.
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=331451&r1=331450&r2=331451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu May 3 05:54:25 2018
@@ -579,6 +579,10 @@ class VectorType;
void finalizeLowering(MachineFunction &MF) const override;
+ /// Return the correct alignment for the current calling convention.
+ unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Added: llvm/trunk/test/CodeGen/ARM/large-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/large-vector.ll?rev=331451&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/large-vector.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/large-vector.ll Thu May 3 05:54:25 2018
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V7K
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
+; RUN: llc -mtriple=thumbv7-apple-ios %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-APCS
+
+define <32 x i8> @test_consume_arg([9 x double], <32 x i8> %vec) {
+; CHECK-LABEL: test_consume_arg:
+
+; CHECK-V7K: add r[[BASE:[0-9]+]], sp, #16
+; CHECK-V7K: vld1.64 {d0, d1}, [r[[BASE]]:128]
+; CHECK-V7K: add r[[BASE:[0-9]+]], sp, #32
+; CHECK-V7K: vld1.64 {d2, d3}, [r[[BASE]]:128]
+
+; CHECK-AAPCS: add r[[BASE:[0-9]+]], sp, #8
+; CHECK-AAPCS: vld1.64 {d0, d1}, [r[[BASE]]]
+; CHECK-AAPCS: add r[[BASE:[0-9]+]], sp, #24
+; CHECK-AAPCS: vld1.64 {d2, d3}, [r[[BASE]]]
+
+; CHECK-APCS: add r[[BASE:[0-9]+]], sp, #76
+; CHECK-APCS: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+; CHECK-APCS: add r[[BASE:[0-9]+]], sp, #60
+; CHECK-APCS: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+
+ ret <32 x i8> %vec
+}
+
+define void @test_produce_arg() {
+; CHECK-LABEL: test_produce_arg:
+
+; CHECK-V7K: add r[[BASE:[0-9]+]], sp, #32
+; CHECK-V7K: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]:128]
+; CHECK-V7K: add r[[BASE:[0-9]+]], sp, #16
+; CHECK-V7K: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]:128]
+
+; CHECK-AAPCS: add r[[BASE:[0-9]+]], sp, #24
+; CHECK-AAPCS: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+; CHECK-AAPCS: add r[[BASE:[0-9]+]], sp, #8
+; CHECK-AAPCS: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+
+; CHECK-APCS: add r[[BASE:[0-9]+]], sp, #60
+; CHECK-APCS: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+; CHECK-APCS: mov r[[BASE:[0-9]+]], sp
+; CHECK-APCS: str {{r[0-9]+}}, [r[[BASE]]], #76
+; CHECK-APCS: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+
+call <32 x i8> @test_consume_arg([9 x double] undef, <32 x i8> zeroinitializer)
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=331451&r1=331450&r2=331451&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Thu May 3 05:54:25 2018
@@ -459,35 +459,32 @@ define <8 x i8> @vuzp_trunc_and_shuffle_
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
; CHECK-LABEL: vuzp_wide_type:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r10, r11, lr}
-; CHECK-NEXT: push {r4, r10, r11, lr}
-; CHECK-NEXT: .setfp r11, sp, #8
-; CHECK-NEXT: add r11, sp, #8
-; CHECK-NEXT: bic sp, sp, #15
-; CHECK-NEXT: add r12, r11, #32
-; CHECK-NEXT: add lr, r11, #44
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: add r12, sp, #32
+; CHECK-NEXT: add lr, sp, #48
; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
-; CHECK-NEXT: add r12, r11, #24
+; CHECK-NEXT: add r12, sp, #24
; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
-; CHECK-NEXT: add r12, r11, #52
+; CHECK-NEXT: add r12, sp, #56
; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
-; CHECK-NEXT: ldr r12, [r11, #64]
+; CHECK-NEXT: ldr r12, [sp, #68]
; CHECK-NEXT: vld1.32 {d18[0]}, [lr:32]
-; CHECK-NEXT: add lr, r11, #40
+; CHECK-NEXT: add lr, sp, #40
; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
; CHECK-NEXT: ldr r4, [r12]
; CHECK-NEXT: vmov.32 d23[0], r4
-; CHECK-NEXT: add r4, r11, #60
+; CHECK-NEXT: add r4, sp, #64
; CHECK-NEXT: vld1.32 {d24[0]}, [r4:32]
-; CHECK-NEXT: add r4, r11, #36
+; CHECK-NEXT: add r4, sp, #36
; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
-; CHECK-NEXT: add r4, r11, #28
+; CHECK-NEXT: add r4, sp, #28
; CHECK-NEXT: vcgt.u32 q10, q12, q10
; CHECK-NEXT: vmov.u8 lr, d23[3]
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
-; CHECK-NEXT: add r4, r11, #56
+; CHECK-NEXT: add r4, sp, #60
; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
-; CHECK-NEXT: add r4, r11, #48
+; CHECK-NEXT: add r4, sp, #52
; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
; CHECK-NEXT: add r4, r12, #4
; CHECK-NEXT: vcgt.u32 q8, q9, q8
@@ -500,7 +497,7 @@ define <10 x i8> @vuzp_wide_type(<10 x i
; CHECK-NEXT: vneg.s8 q9, q9
; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
-; CHECK-NEXT: add r4, r11, #8
+; CHECK-NEXT: add r4, sp, #8
; CHECK-NEXT: vshl.i8 q8, q8, #7
; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
; CHECK-NEXT: vshl.s8 q8, q8, q9
@@ -509,8 +506,7 @@ define <10 x i8> @vuzp_wide_type(<10 x i
; CHECK-NEXT: vbsl q8, q9, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: sub sp, r11, #8
-; CHECK-NEXT: pop {r4, r10, r11, lr}
+; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
More information about the llvm-commits
mailing list