[PATCH] D148234: [AArch64] Remove AND and FMOV between uaddlv an urshl

Thu Apr 13 07:55:20 PDT 2023

jaykang10 created this revision.
jaykang10 added reviewers: dmgreen, efriedma, t.p.northover.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
jaykang10 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

gcc generates less instructions than llvm from below intrinsic example. The example has mentioned on https://reviews.llvm.org/D148134.

  #include <arm_neon.h>
  
  uint8x8_t test1(uint8x8_t a) {
      return vdup_n_u8(vrshrd_n_u64(vaddlv_u8(a), 3));
  }
  
  gcc output
  test1:
  	uaddlv	h0, v0.8b
  	umov	w0, v0.h[0]
  	fmov	d0, x0
  	urshr	d0, d0, 3
  	dup	v0.8b, v0.b[0]
  	ret
  
  llvm output
  test1:                                  // @test1
  	uaddlv	h0, v0.8b
  	fmov	w8, s0
  	and	w8, w8, #0xffff
  	fmov	d0, x8
  	urshr	d0, d0, #3
  	fmov	x8, d0
  	dup	v0.8b, w8
  	ret

With this patch's tablegen pattern, llvm generates below output.

  test1:                                  // @test1
  	uaddlv	h0, v0.8b
  	urshr	d0, d0, #3
  	fmov	x8, d0
  	dup	v0.8b, w8
  	ret


https://reviews.llvm.org/D148234

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/remove-and-fmov-between-uaddlv-urshl.ll


Index: llvm/test/CodeGen/AArch64/remove-and-fmov-between-uaddlv-urshl.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AArch64/remove-and-fmov-between-uaddlv-urshl.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define <8 x i8> @test1(<8 x i8> noundef %a) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddlv h0, v0.8b
+; CHECK-NEXT:    urshr d0, d0, #3
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    dup v0.8b, w8
+; CHECK-NEXT:    ret
+entry:
+  %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+  %0 = and i32 %vaddlv.i, 65535
+  %conv = zext i32 %0 to i64
+  %vrshr_n = tail call i64 @llvm.aarch64.neon.urshl.i64(i64 %conv, i64 -3)
+  %conv1 = trunc i64 %vrshr_n to i8
+  %vecinit.i = insertelement <8 x i8> undef, i8 %conv1, i64 0
+  %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %vecinit7.i
+}
+
+declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) #1
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) #1
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6995,6 +6995,17 @@
     TriOpFrag<(add_and_or_is_add node:$LHS,
                    (AArch64vlshr node:$MHS, node:$RHS))>>;
 
+def : Pat<(i64 (AArch64urshri
+                 (i64 (zext
+                   (i32 (and
+                     (i32 (int_aarch64_neon_uaddlv (v8i8 V64:$Rn))), (i32 65535))))),
+                 (i32 vecshiftR64:$imm))),
+          (i64 (URSHRd
+            (EXTRACT_SUBREG
+              (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+                (UADDLVv8i8v V64:$Rn), hsub), dsub),
+             vecshiftR64:$imm))>;
+
 //----------------------------------------------------------------------------
 // AdvSIMD vector shift instructions
 //----------------------------------------------------------------------------


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D148234.513235.patch
Type: text/x-patch
Size: 2202 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230413/bcb7e0ca/attachment-0001.bin>