[llvm] r216200 - [AArch64] Run a peephole pass right after AdvSIMD pass.
Quentin Colombet
qcolombet at apple.com
Thu Aug 21 11:10:07 PDT 2014
Author: qcolombet
Date: Thu Aug 21 13:10:07 2014
New Revision: 216200
URL: http://llvm.org/viewvc/llvm-project?rev=216200&view=rev
Log:
[AArch64] Run a peephole pass right after AdvSIMD pass.
The AdvSIMD pass may produce copies that are not coalescer-friendly. The
peephole optimizer knows how to fix that as demonstrated in the test case.
<rdar://problem/12702965>
Modified:
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=216200&r1=216199&r2=216200&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Aug 21 13:10:07 2014
@@ -195,8 +195,12 @@ bool AArch64PassConfig::addILPOpts() {
bool AArch64PassConfig::addPreRegAlloc() {
// Use AdvSIMD scalar instructions whenever profitable.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
+ // The AdvSIMD pass may produce copies that can be rewritten to
+ // be register coaleascer friendly.
+ addPass(&PeepholeOptimizerID);
+ }
return true;
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll?rev=216200&r1=216199&r2=216200&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll Thu Aug 21 13:10:07 2014
@@ -1,15 +1,36 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: bar:
; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
+; Without advanced copy optimization, we end up with cross register
+; banks copies that cannot be coalesced.
+; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
+; With advanced copy optimization, we end up with just one copy
+; to insert the computed high part into the V register.
+; CHECK-OPT-NOT: fmov
; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
+; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
+; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
+; CHECK-OPT-NOT: fmov
+; CHECK: ins.d v0[1], [[COPY_REG2]]
+; CHECK-NEXT: ret
+;
; GENERIC-LABEL: bar:
; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
+; GENERIC-OPT-NOT: fmov
; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
+; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
+; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
+; GENERIC-OPT-NOT: fmov
+; GENERIC: ins v0.d[1], [[COPY_REG2]]
+; GENERIC-NEXT: ret
%add = add <2 x i64> %a, %b
%vgetq_lane = extractelement <2 x i64> %add, i32 0
%vgetq_lane2 = extractelement <2 x i64> %b, i32 0
More information about the llvm-commits
mailing list