[PATCH] D65400: [AArch64] Skip isZIPMask check for masks with an odd number of elements.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 08:49:01 PDT 2019
fhahn created this revision.
fhahn added reviewers: dmgreen, samparker, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls, javed.absar.
Herald added a project: LLVM.
We process 2 elements at a time and expect the number of elements to be
even. Similar to D60690 <https://reviews.llvm.org/D60690>.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D65400
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
Index: llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -31,3 +31,20 @@
%s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
ret <4 x i32> %s3
}
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+ %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+ %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+ %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+ %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+ %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+ %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+ %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+ %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+ store i32 %tmp12, i32* %p3, align 4
+ ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6305,6 +6305,8 @@
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65400.212171.patch
Type: text/x-patch
Size: 2152 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190729/2ca720d8/attachment.bin>
More information about the llvm-commits
mailing list