[PATCH] [AArch64 NEON] Fix a bug in implementing register copy bwtween FPR16.
Kevin Qin
kevinqindev at gmail.com
Wed Jan 22 22:20:37 PST 2014
Thank Tim for review. Corresponding test case is added.
http://llvm-reviews.chandlerc.com/D2590
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D2590?vs=6566&id=6593#toc
Files:
lib/Target/AArch64/AArch64InstrInfo.cpp
test/CodeGen/AArch64/neon-copy.ll
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -135,9 +135,9 @@
} else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
// The copy of two FPR16 registers is implemented by the copy of two FPR32
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned Dst = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
+ unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
&AArch64::FPR32RegClass);
- unsigned Src = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
+ unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
&AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
.addReg(Src);
Index: test/CodeGen/AArch64/neon-copy.ll
===================================================================
--- test/CodeGen/AArch64/neon-copy.ll
+++ test/CodeGen/AArch64/neon-copy.ll
@@ -979,4 +979,15 @@
%0 = extractelement <1 x float> %a, i32 0
%vecinit1.i = insertelement <4 x float> undef, float %0, i32 0
ret <4 x float> %vecinit1.i
-}
\ No newline at end of file
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {
+; CHECK-LABEL: test_copy_FPR16_FPR16:
+; CHECK: usqadd h1, h0
+; CHECK-NEXT: fmov s0, s1
+entry:
+ %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a)
+ ret <1 x i16> %vsqadd2.i
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2590.2.patch
Type: text/x-patch
Size: 1673 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140122/ce6acc3d/attachment.bin>
More information about the llvm-commits
mailing list