[llvm] r323289 - AArch64: Cyclone: Remove SlowMisaligned128Store tuning flag

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 16:39:53 PST 2018


Author: matze
Date: Tue Jan 23 16:39:53 2018
New Revision: 323289

URL: http://llvm.org/viewvc/llvm-project?rev=323289&view=rev
Log:
AArch64: Cyclone: Remove SlowMisaligned128Store tuning flag

Remove FeatureSlowMisaligned128Store from cyclone flags.
This flag causes splitting of 16 byte wide stores into 2 stored of 8
bytes. This was useful on older apple CPUs which were slow for 16byte
stores that were not aligned on 16byte. As the compiler often cannot
predict the actual alignment, the splitting was choosen.

This has been a topic for a lot of debate as the splitting also
decreases performance for some benchmarks. Measuring the effects on
newer apple chips (rdar://35525421) shows that it harms more cases than
it helps. So it is time to retire this workaround.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.td
    llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-stur.ll
    llvm/trunk/test/CodeGen/AArch64/merge-store.ll
    llvm/trunk/test/CodeGen/AArch64/misched-stp.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=323289&r1=323288&r2=323289&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Tue Jan 23 16:39:53 2018
@@ -294,7 +294,6 @@ def ProcCyclone : SubtargetFeature<"cycl
                                    FeatureFuseAES,
                                    FeatureNEON,
                                    FeaturePerfMon,
-                                   FeatureSlowMisaligned128Store,
                                    FeatureZCRegMove,
                                    FeatureZCZeroing,
                                    FeatureZCZeroingFPWorkaround

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll?rev=323289&r1=323288&r2=323289&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll Tue Jan 23 16:39:53 2018
@@ -105,8 +105,8 @@ define void @splat_v4i32(i32 %v, i32 *%p
 entry:
 
 ; CHECK-LABEL: splat_v4i32
-; CHECK-DAG: stp w0, w0, [x1]
-; CHECK-DAG: stp w0, w0, [x1, #8]
+; CHECK-DAG: dup v0.4s, w0
+; CHECK-DAG: str q0, [x1]
 ; CHECK: ret
 
   %p17 = insertelement <4 x i32> undef, i32 %v, i32 0
@@ -129,8 +129,7 @@ entry:
 ; CHECK-DAG: mov v[[REG1]].s[1], w0
 ; CHECK-DAG: mov v[[REG1]].s[2], w0
 ; CHECK-DAG: mov v[[REG1]].s[3], w0
-; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
-; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: str q[[REG1]], [x1]
 ; CHECK: ret
 
   %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
@@ -151,8 +150,7 @@ entry:
 ; CHECK: mov v[[REG1]].s[1], w0
 ; CHECK-DAG: mov v[[REG1]].s[2], w0
 ; CHECK-DAG: mov v[[REG1]].s[3], w0
-; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
-; CHECK: stp d[[REG1]], d[[REG2]], [x1]
+; CHECK: str q[[REG1]], [x1]
 ; CHECK: ret
 
   %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-stur.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-stur.ll?rev=323289&r1=323288&r2=323289&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-stur.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-stur.ll Tue Jan 23 16:39:53 2018
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s
 %struct.X = type <{ i32, i64, i64 }>
 
 define void @foo1(i32* %p, i64 %val) nounwind {

Modified: llvm/trunk/test/CodeGen/AArch64/merge-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/merge-store.ll?rev=323289&r1=323288&r2=323289&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/merge-store.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/merge-store.ll Tue Jan 23 16:39:53 2018
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s --check-prefix=SPLITTING --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=aarch64-eabi -mattr=-slow-misaligned-128store | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
 
 @g0 = external global <3 x float>, align 16
@@ -44,9 +44,9 @@ define void @merge_vec_extract_stores(<4
 ; FIXME: Ideally we would like to use a generic target for this test, but this relies
 ; on suppressing store pairs.
 
-; CYCLONE-LABEL:    merge_vec_extract_stores
-; CYCLONE:          ext   v1.16b, v0.16b, v0.16b, #8
-; CYCLONE-NEXT:     str   d0, [x0, #24]
-; CYCLONE-NEXT:     str   d1, [x0, #32]
-; CYCLONE-NEXT:     ret
+; SPLITTING-LABEL:    merge_vec_extract_stores
+; SPLITTING:          ext   v1.16b, v0.16b, v0.16b, #8
+; SPLITTING-NEXT:     str   d0, [x0, #24]
+; SPLITTING-NEXT:     str   d1, [x0, #32]
+; SPLITTING-NEXT:     ret
 }

Modified: llvm/trunk/test/CodeGen/AArch64/misched-stp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-stp.ll?rev=323289&r1=323288&r2=323289&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-stp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/misched-stp.ll Tue Jan 23 16:39:53 2018
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa,+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s
 
 ; Tests to check that the scheduler dependencies derived from alias analysis are
 ; correct when we have loads that have been split up so that they can later be




More information about the llvm-commits mailing list