[llvm] r305457 - [AArch64] Enable FeatureFuseAES for the generic processor model.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 15 02:31:23 PDT 2017
Author: fhahn
Date: Thu Jun 15 04:31:23 2017
New Revision: 305457
URL: http://llvm.org/viewvc/llvm-project?rev=305457&view=rev
Log:
[AArch64] Enable FeatureFuseAES for the generic processor model.
Summary:
Scheduling AESE/AESMC and AESD/AESIMC instruction pairs back-to-back
gives a double digit speedup on benchmarks using those instructions on
Cortex-A processors. In GCC, this optimization is part of the generic
processor model as well.
This change should not have a major performance impact on processors
that do not optimize AES instruction pairs, although I only had access
to Cortex-A processors for benchmarking.
Reviewers: rengolin, kristof.beyls, javed.absar, evandro, silviu.baranga, MatzeB, mcrosier, joelkevinjones, joel_k_jones, bmakam, t.p.northover
Reviewed By: evandro
Subscribers: sbaranga, aemerson, llvm-commits
Differential Revision: https://reviews.llvm.org/D33836
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=305457&r1=305456&r2=305457&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Thu Jun 15 04:31:23 2017
@@ -362,6 +362,7 @@ def ProcThunderXT83 : SubtargetFeature<"
def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler
Modified: llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll?rev=305457&r1=305456&r2=305457&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll Thu Jun 15 04:31:23 2017
@@ -1,7 +1,9 @@
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKM1
declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
@@ -74,22 +76,23 @@ define void @aesea(<16 x i8>* %a0, <16 x
ret void
; CHECK-LABEL: aesea:
-; CHECKCORTEX: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
-; CHECKCORTEX: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
-; CHECKCORTEX: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VC]]
-; CHECKCORTEX: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VD]]
-; CHECKCORTEX: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VE]]
-; CHECKCORTEX: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VF]]
-; CHECKCORTEX: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
-; CHECKCORTEX: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
+; CHECKFUSEALLPAIRS: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
+; CHECKFUSEALLPAIRS: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
+; CHECKFUSEALLPAIRS: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VC]]
+; CHECKFUSEALLPAIRS: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VD]]
+; CHECKFUSEALLPAIRS: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VE]]
+; CHECKFUSEALLPAIRS: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VF]]
+; CHECKFUSEALLPAIRS: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
+; CHECKFUSEALLPAIRS: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
+; CHECKFUSEALLPAIRS-NOT: aesmc
; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
@@ -175,22 +178,23 @@ define void @aesda(<16 x i8>* %a0, <16 x
ret void
; CHECK-LABEL: aesda:
-; CHECKCORTEX: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
-; CHECKCORTEX: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VB]]
-; CHECKCORTEX: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VC]]
-; CHECKCORTEX: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VD]]
-; CHECKCORTEX: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VE]]
-; CHECKCORTEX: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VF]]
-; CHECKCORTEX: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
-; CHECKCORTEX: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
-; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
+; CHECKFUSEALLPAIRS: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
+; CHECKFUSEALLPAIRS: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VB]]
+; CHECKFUSEALLPAIRS: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VC]]
+; CHECKFUSEALLPAIRS: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VD]]
+; CHECKFUSEALLPAIRS: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VE]]
+; CHECKFUSEALLPAIRS: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VF]]
+; CHECKFUSEALLPAIRS: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
+; CHECKFUSEALLPAIRS: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
+; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
+; CHECKFUSEALLPAIRS-NOT: aesimc
; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
@@ -236,4 +240,5 @@ entry:
; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
+; CHECK-NOT: aesmc
}
More information about the llvm-commits
mailing list