[PATCH] [AArch64]Implement 128 bit register copy with NEON.
Kevin Qin
kevinqindev at gmail.com
Mon Nov 25 02:14:49 PST 2013
kevin.qin added you to the CC list for the revision "[AArch64]Implement 128 bit register copy with NEON.".
http://llvm-reviews.chandlerc.com/D2264
Files:
lib/Target/AArch64/AArch64InstrInfo.cpp
test/CodeGen/AArch64/neon-perm.ll
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -114,23 +114,28 @@
} else if (AArch64::FPR128RegClass.contains(DestReg)) {
assert(AArch64::FPR128RegClass.contains(SrcReg));
- // FIXME: there's no good way to do this, at least without NEON:
- // + There's no single move instruction for q-registers
- // + We can't create a spill slot and use normal STR/LDR because stack
- // allocation has already happened
- // + We can't go via X-registers with FMOV because register allocation has
- // already happened.
- // This may not be efficient, but at least it works.
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
- return;
+ // If NEON is enabled, we use DUP and FMOV to implement this copy.
+ // If NEON isn't available, emit STR and LDR to handle this.
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::DUPELT2d), DestReg)
+ .addReg(SrcReg)
+ .addImm(1);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd),
+ getRegisterInfo().getSubReg(DestReg, AArch64::sub_64))
+ .addReg(getRegisterInfo().getSubReg(SrcReg, AArch64::sub_64));
+ return;
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ }
} else {
llvm_unreachable("Unknown register class in copyPhysReg");
}
Index: test/CodeGen/AArch64/neon-perm.ll
===================================================================
--- test/CodeGen/AArch64/neon-perm.ll
+++ test/CodeGen/AArch64/neon-perm.ll
@@ -238,6 +238,8 @@
define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK: test_vuzp2q_s64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: fmov {{d[0-9]+}}, {{d[0-9]+}}
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle.i
@@ -294,6 +296,8 @@
define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK: test_vuzp2q_u64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: fmov {{d[0-9]+}}, {{d[0-9]+}}
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle.i
@@ -318,6 +322,8 @@
define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
; CHECK: test_vuzp2q_f64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: fmov {{d[0-9]+}}, {{d[0-9]+}}
entry:
%shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle.i
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2264.1.patch
Type: text/x-patch
Size: 3374 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131125/2788abc0/attachment.bin>
More information about the llvm-commits
mailing list