[PATCH] [AArch64 NEON] Fix a pattern match failure with NEON_VDUP
Kevin Qin
kevinqindev at gmail.com
Wed Dec 18 01:06:26 PST 2013
Hi,
This failure caused by improper condition when lowering shuffle_vector to scalar_to_vector. After this patch NEON_VDUP with v1i64 will never be generated. Please review.
http://llvm-reviews.chandlerc.com/D2433
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
lib/Target/AArch64/AArch64InstrNEON.td
test/CodeGen/AArch64/neon-simd-ldst-one.ll
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4038,9 +4038,7 @@
if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
- // Loads are better lowered with insert_vector_elt.
- // Keep going if we are hitting this case.
- if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
+ if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -3646,12 +3646,16 @@
def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
-def : LD1R_pattern<v1i64, i64, load, LD1R_1D>;
-def : LD1R_pattern<v1f64, f64, load, LD1R_1D>;
-
def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
+class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
+ Instruction INST>
+ : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
+ (VTy (INST GPR64xsp:$Rn))>;
+
+def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
+def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
RegisterClass RegList> {
Index: test/CodeGen/AArch64/neon-simd-ldst-one.ll
===================================================================
--- test/CodeGen/AArch64/neon-simd-ldst-one.ll
+++ test/CodeGen/AArch64/neon-simd-ldst-one.ll
@@ -155,6 +155,19 @@
ret <1 x double> %1
}
+define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
+; As there is a store operation depending on %1, LD1R pattern can't be selected.
+; So LDR and FMOV should be emitted.
+; CHECK-LABEL: testDUP.v1i64
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}]
+ %1 = load i64* %a, align 8
+ store i64 %1, i64* %b, align 8
+ %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
+ ret <1 x i64> %vecinit.i
+}
+
define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) {
; CHECK-LABEL: test_vld2q_dup_s8
; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
@@ -2110,4 +2123,4 @@
declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
\ No newline at end of file
+declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2433.1.patch
Type: text/x-patch
Size: 3062 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131218/13b29b66/attachment.bin>
More information about the llvm-commits
mailing list