[llvm] r367831 - [AArch64] Skip isZIPMask check for masks with an odd number of elements.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 04:12:23 PDT 2019
Author: fhahn
Date: Mon Aug 5 04:12:23 2019
New Revision: 367831
URL: http://llvm.org/viewvc/llvm-project?rev=367831&view=rev
Log:
[AArch64] Skip isZIPMask check for masks with an odd number of elements.
We process 2 elements at a time and expect the number of elements to be
even. Similar to D60690.
Reviewers: dmgreen, samparker, t.p.northover
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D65400
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=367831&r1=367830&r2=367831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Aug 5 04:12:23 2019
@@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef<int> M, E
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll?rev=367831&r1=367830&r2=367831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll Mon Aug 5 04:12:23 2019
@@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced
%s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
ret <4 x i32> %s3
}
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+; CHECK-LABEL: zip_mask_check:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: str s0, [x2]
+; CHECK-NEXT: ret
+ %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+ %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+ %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+ %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+ %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+ %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+ %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+ %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+ store i32 %tmp12, i32* %p3, align 4
+ ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
More information about the llvm-commits
mailing list