[PATCH] D62308: [AArch64] support neon_sshl in performIntrinsicCombine.

Thu May 23 06:13:56 PDT 2019

fhahn created this revision.
fhahn added reviewers: t.p.northover, samparker, dmgreen.
Herald added subscribers: hiraditya, kristof.beyls, javed.absar.
Herald added a project: LLVM.

If we have a constant vector mask with the shift values being all equal,
we can simplify aarch64_none_sshl to VSHL.

This pattern can be generated by code using vshlq_s32(a,vdupq_n_s32(n))
instead of vshlq_n_s32(a, n), because it is used in contexts where n is
not guaranteed to be constant, before inlining.

Also adds 2 tests marked with FIXME, where we can further increase
codegen.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D62308

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/arm64-vshift.ll


Index: llvm/test/CodeGen/AArch64/arm64-vshift.ll
===================================================================

--- llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1219,6 +1219,65 @@
         ret <2 x i64> %tmp3
 }
 
+declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
+
+define <16 x i8> @neon.sshll16b_constant_shift(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: neon.sshll16b_constant_shift
+;CHECK: shl.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: neon.sshll8h_constant_shift
+;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @neon.sshll4s_constant_shift(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: neon.sshll4s_constant_shift
+;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        ret <4 x i32> %tmp3
+}
+
+; FIXME: unnecessary sshll.4s v0, v0, #0?
+define <4 x i32> @neon.sshll4s_neg_constant_shift(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: neon.sshll4s_neg_constant_shift
+;CHECK: movi.2d v1, #0xffffffffffffffff
+;CHECK: sshll.4s v0, v0, #0
+;CHECK: sshl.4s v0, v0, v1
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        ret <4 x i32> %tmp3
+}
+
+; FIXME: should be constant folded.
+define <4 x i32> @neon.sshll4s_constant_fold() nounwind {
+;CHECK-LABEL: neon.sshll4s_constant_fold
+;CHECK: shl.4s v0, {{v[0-9]+}}, #1
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @neon.sshll2d_constant_shift(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: neon.sshll2d_constant_shift
+;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
+        ret <2 x i64> %tmp3
+}
 define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sshll2_8h:
 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10046,6 +10046,10 @@
     Opcode = AArch64ISD::SQSHLU_I;
     IsRightShift = false;
     break;
+  case Intrinsic::aarch64_neon_sshl:
+    Opcode = AArch64ISD::VSHL;
+    IsRightShift = false;
+    break;
   }
 
   if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
@@ -10132,6 +10136,7 @@
   case Intrinsic::aarch64_neon_sqshlu:
   case Intrinsic::aarch64_neon_srshl:
   case Intrinsic::aarch64_neon_urshl:
+  case Intrinsic::aarch64_neon_sshl:
     return tryCombineShiftImm(IID, N, DAG);
   case Intrinsic::aarch64_crc32b:
   case Intrinsic::aarch64_crc32cb:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62308.200949.patch
Type: text/x-patch
Size: 4003 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190523/a499d9dc/attachment.bin>