[PATCH] [AArch64]Fix the problem can't select concat_vector of two v1i32 types
Hao Liu
Hao.Liu at arm.com
Wed Jan 15 02:33:15 PST 2014
Hi t.p.northover,
Hi Tim and reviewers,
This patch fix the problem that can't select concat_vectors of two v1i32 types, match such nodes to dup instructions.
Also fix the problem can't select scalar_to_vector from f32 to v2f32/v4f32, match such nodes to SUBREG_TO_REG.
Review. Please.
Thanks,
-Hao
http://llvm-reviews.chandlerc.com/D2550
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
lib/Target/AArch64/AArch64InstrNEON.td
test/CodeGen/AArch64/neon-copy.ll
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -310,13 +310,11 @@
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -6884,15 +6884,10 @@
def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
(INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
-def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
- (v2i32 (EXTRACT_SUBREG (v16i8
- (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_64))>;
-
-def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
- (v2i32 (EXTRACT_SUBREG (v16i8
- (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_64))>;
+def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
+def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
(v1f64 FPR64:$Rn)>;
@@ -7059,6 +7054,11 @@
defm : Concat_Vector_Pattern<v4f32, v2f32>;
defm : Concat_Vector_Pattern<v2f64, v1f64>;
+def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))),
+ (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
+def : Pat<(v2i32 (concat_vectors undef, (v1i32 FPR32:$Rn))),
+ (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
+
//patterns for EXTRACT_SUBVECTOR
def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
(v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
Index: test/CodeGen/AArch64/neon-copy.ll
===================================================================
--- test/CodeGen/AArch64/neon-copy.ll
+++ test/CodeGen/AArch64/neon-copy.ll
@@ -948,3 +948,35 @@
ret <2 x i32> %vecinit1.i
}
+define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) {
+; CHECK-LABEL: test_concat_undef_v1i32:
+; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
+entry:
+ %0 = extractelement <1 x i32> %a, i32 0
+ %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
+ ret <2 x i32> %vecinit1.i
+}
+
+define <2 x i32> @test_concat_v1i32_v1i32(<1 x i32> %a) {
+; CHECK-LABEL: test_concat_v1i32_v1i32:
+; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
+entry:
+ %0 = extractelement <1 x i32> %a, i32 0
+ %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
+ %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
+ ret <2 x i32> %vecinit1.i
+}
+
+define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<1 x float> %a) {
+entry:
+ %0 = extractelement <1 x float> %a, i32 0
+ %vecinit1.i = insertelement <2 x float> undef, float %0, i32 0
+ ret <2 x float> %vecinit1.i
+}
+
+define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<1 x float> %a) {
+entry:
+ %0 = extractelement <1 x float> %a, i32 0
+ %vecinit1.i = insertelement <4 x float> undef, float %0, i32 0
+ ret <4 x float> %vecinit1.i
+}
\ No newline at end of file
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2550.1.patch
Type: text/x-patch
Size: 3943 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140115/40133e5f/attachment.bin>
More information about the llvm-commits
mailing list