[PATCH] D65400: [AArch64] Skip isZIPMask check for masks with an odd number of elements.

Mon Jul 29 08:49:01 PDT 2019

fhahn created this revision.
fhahn added reviewers: dmgreen, samparker, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls, javed.absar.
Herald added a project: LLVM.

We process 2 elements at a time and expect the number of elements to be
even. Similar to D60690 <https://reviews.llvm.org/D60690>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D65400

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll


Index: llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
===================================================================

--- llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -31,3 +31,20 @@
   %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
   ret <4 x i32> %s3
 }
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+  %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+  %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+  %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+  %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+  %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+  %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+  %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+  %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+  store i32 %tmp12, i32* %p3, align 4
+  ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6305,6 +6305,8 @@
 
 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
   WhichResult = (M[0] == 0 ? 0 : 1);
   unsigned Idx = WhichResult * NumElts / 2;
   for (unsigned i = 0; i != NumElts; i += 2) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65400.212171.patch
Type: text/x-patch
Size: 2152 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190729/2ca720d8/attachment.bin>