[PATCH] D14664: [ARM] Match VABDL from log2 shuffles.

Charlie Turner via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 13 10:42:53 PST 2015


chatur01 created this revision.
chatur01 added reviewers: jmolloy, rengolin.
chatur01 added a subscriber: llvm-commits.
chatur01 set the repository for this revision to rL LLVM.
Herald added subscribers: rengolin, aemerson.

The patterns are pretty gross, but I couldn't see a more concise way of doing it.

The bitcasts are needed for the `v2i64` and `v8i16` cases because they're type-legalized to `v4i32`.

Repository:
  rL LLVM

http://reviews.llvm.org/D14664

Files:
  lib/Target/ARM/ARMInstrNEON.td
  test/CodeGen/ARM/neon_vabs.ll

Index: test/CodeGen/ARM/neon_vabs.ll
===================================================================
--- test/CodeGen/ARM/neon_vabs.ll
+++ test/CodeGen/ARM/neon_vabs.ll
@@ -89,3 +89,41 @@
         %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
         ret <2 x i32> %abs
 }
+
+;; Check that absdiff patterns as emitted by log2 shuffles are
+;; matched by VABD.
+
+define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind {
+; CHECK-LABEL: test11:
+; CHECK: vabdl.u16 q
+        %zext1 = zext <4 x i16> %a to <4 x i32>
+        %zext2 = zext <4 x i16> %b to <4 x i32>
+        %diff = sub <4 x i32> %zext1, %zext2
+        %shift1 = ashr <4 x i32> %diff, <i32 31, i32 31, i32 31, i32 31>
+        %add1 = add <4 x i32> %shift1, %diff
+        %res = xor <4 x i32> %shift1, %add1
+        ret <4 x i32> %res
+}
+define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind {
+; CHECK-LABEL: test12:
+; CHECK: vabdl.u8 q
+        %zext1 = zext <8 x i8> %a to <8 x i16>
+        %zext2 = zext <8 x i8> %b to <8 x i16>
+        %diff = sub <8 x i16> %zext1, %zext2
+        %shift1 = ashr <8 x i16> %diff,<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+        %add1 = add <8 x i16> %shift1, %diff
+        %res = xor <8 x i16> %shift1, %add1
+        ret <8 x i16> %res
+}
+
+define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
+; CHECK-LABEL: test13:
+; CHECK: vabdl.u32 q
+        %zext1 = zext <2 x i32> %a to <2 x i64>
+        %zext2 = zext <2 x i32> %b to <2 x i64>
+        %diff = sub <2 x i64> %zext1, %zext2
+        %shift1 = ashr <2 x i64> %diff,<i64 63, i64 63>
+        %add1 = add <2 x i64> %shift1, %diff
+        %res = xor <2 x i64> %shift1, %add1
+        ret <2 x i64> %res
+}
Index: lib/Target/ARM/ARMInstrNEON.td
===================================================================
--- lib/Target/ARM/ARMInstrNEON.td
+++ lib/Target/ARM/ARMInstrNEON.td
@@ -5009,6 +5009,31 @@
 defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
                                "vabdl", "u", uabsdiff, zext, 1>;
 
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs (sub (zext (v8i8 DPR:$opA)),
+                                                          (zext (v8i8 DPR:$opB))), (i32 15))))),
+               (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+                                                   (zext (v8i8 DPR:$opB))),
+                                              (v8i16 (NEONvshrs (sub (zext (v8i8 DPR:$opA)),
+                                                                     (zext (v8i8 DPR:$opB))),
+                                                                (i32 15)))))))),
+          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (NEONvshrs (sub (zext (v4i16 DPR:$opA)),
+                                      (zext (v4i16 DPR:$opB))), (i32 31))),
+               (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+                                (zext (v4i16 DPR:$opB))),
+                      (NEONvshrs (sub (zext (v4i16 DPR:$opA)),
+                                      (zext (v4i16 DPR:$opB))), (i32 31))))),
+          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (NEONvshrs (sub (zext (v2i32 DPR:$opA)),
+                                                         (zext (v2i32 DPR:$opB))), (i32 63))))),
+               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+                                                   (zext (v2i32 DPR:$opB))),
+                                         (NEONvshrs (sub (zext (v2i32 DPR:$opA)),
+                                                         (zext (v2i32 DPR:$opB))), (i32 63))))))),
+          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
 //   VABA     : Vector Absolute Difference and Accumulate
 defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
                              "vaba", "s", sabsdiff, add>;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D14664.40164.patch
Type: text/x-patch
Size: 3923 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151113/ad6f70de/attachment.bin>


More information about the llvm-commits mailing list