[llvm] cce0818 - [AArch64] Try to fold uaddlv and uaddlp
Jingu Kang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 20 07:15:05 PDT 2023
Author: Jingu Kang
Date: 2023-06-20T15:14:27+01:00
New Revision: cce08185b4b53d92afee97bc7a245c1f0f79d34a
URL: https://github.com/llvm/llvm-project/commit/cce08185b4b53d92afee97bc7a245c1f0f79d34a
DIFF: https://github.com/llvm/llvm-project/commit/cce08185b4b53d92afee97bc7a245c1f0f79d34a.diff
LOG: [AArch64] Try to fold uaddlv and uaddlp
Add tablegen pattern for uaddlv(uaddlp(x)) ==> uaddlv(x).
Differential Revision: https://reviews.llvm.org/D153323
Added:
llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index daf8aed458096..0e46311295ef0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6329,6 +6329,17 @@ multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp>
defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
+// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
+def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
+ (i64 (EXTRACT_SUBREG
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
+ dsub))>;
+
+def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
+ (i32 (EXTRACT_SUBREG
+ (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
+ ssub))>;
+
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
diff --git a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
new file mode 100644
index 0000000000000..8b48635b6694c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i32 @uaddlv_uaddlp_v8i16(<8 x i16> %0) {
+; CHECK-LABEL: uaddlv_uaddlp_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uaddlv s0, v0.8h
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %2 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %0)
+ %3 = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %2)
+ %4 = trunc i64 %3 to i32
+ ret i32 %4
+}
+
+define i16 @uaddlv_uaddlp_v16i8(<16 x i8> %0) {
+; CHECK-LABEL: uaddlv_uaddlp_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %2 = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %0)
+ %3 = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %2)
+ %4 = trunc i32 %3 to i16
+ ret i16 %4
+}
+
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>)
More information about the llvm-commits
mailing list