[PATCH] D116166: [AArch64] Add a tablegen pattern for RADDHN/RADDHN2

Fri Dec 24 03:31:35 PST 2021

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGbb84dd81590b: [AArch64] Add a tablegen pattern for RADDHN/RADDHN2. (authored by labrinea).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116166/new/

https://reviews.llvm.org/D116166

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll


Index: llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
+
+define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: test_combine_v8i16_to_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    raddhn v0.8b, v0.8h, v2.8h
+; CHECK-NEXT:    raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %x, i32 8)
+  %res2 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %y, i32 8)
+  %shuffle = shufflevector <8 x i8> %res, <8 x i8> %res2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: test_combine_v4i32_to_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    raddhn v0.4h, v0.4s, v2.4s
+; CHECK-NEXT:    raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %x, i32 16)
+  %res2 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %y, i32 16)
+  %shuffle = shufflevector <4 x i16> %res, <4 x i16> %res2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_combine_v2i64_to_v4i32(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: test_combine_v2i64_to_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    raddhn v0.2s, v0.2d, v2.2d
+; CHECK-NEXT:    raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %x, i32 32)
+  %res2 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %y, i32 32)
+  %shuffle = shufflevector <2 x i32> %res, <2 x i32> %res2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6603,6 +6603,34 @@
 defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
                 TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
 
+// RADDHN patterns for when RSHRN shifts by half the size of the vector element
+def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
+          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
+          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
+          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
+
+// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
+def : Pat<(v16i8 (concat_vectors
+                 (v8i8 V64:$Vd),
+                 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
+          (RADDHNv8i16_v16i8
+                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+                 (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v8i16 (concat_vectors
+                 (v4i16 V64:$Vd),
+                 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
+          (RADDHNv4i32_v8i16
+                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+                 (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i32 (concat_vectors
+                 (v2i32 V64:$Vd),
+                 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
+          (RADDHNv2i64_v4i32
+                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+                 (v2i64 (MOVIv2d_ns (i32 0))))>;
+
 // SHRN patterns for when a logical right shift was used instead of arithmetic
 // (the immediate guarantees no sign bits actually end up in the result so it
 // doesn't matter).


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D116166.396150.patch
Type: text/x-patch
Size: 4582 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211224/03a7cfd6/attachment.bin>