[llvm] 74d1fe7 - [AArch64] Expand UADDLV patterns to SADDLV
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 4 06:07:06 PST 2022
Author: David Green
Date: 2022-02-04T14:07:02Z
New Revision: 74d1fe72f4f25188de8a13b0826d8382c210187c
URL: https://github.com/llvm/llvm-project/commit/74d1fe72f4f25188de8a13b0826d8382c210187c
DIFF: https://github.com/llvm/llvm-project/commit/74d1fe72f4f25188de8a13b0826d8382c210187c.diff
LOG: [AArch64] Expand UADDLV patterns to SADDLV
We already had some patterns for UADDV(UADDLP(x)) -> UADDLV(x), this
simply expands them to the signed instructions by re-using the tablegen
patterns.
Differential Revision: https://reviews.llvm.org/D118133
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/neon-addlv.ll
llvm/test/CodeGen/AArch64/vecreduce-add.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8dc91d6c1be2d..42c1afcff6a90 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5860,24 +5860,28 @@ defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-// Patterns for uaddv(uaddlp(x)) ==> uaddlv
-def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
- (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
- (i64 0))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (UADDLVv8i8v V64:$op), hsub), ssub)>;
-def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
- (v16i8 V128:$op))))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
- (UADDLVv16i8v V128:$op), hsub), ssub)>;
-def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
-
-// Patterns for addp(uaddlp(x))) ==> uaddlv
-def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
- (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
-def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
+multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
+ // Patterns for addv(addlp(x)) ==> addlv
+ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
+ (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
+ (i64 0))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
+ def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
+ def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
+
+ // Patterns for addp(addlp(x))) ==> addlv
+ def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
+ def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
+}
+
+defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
+defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
diff --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll
index bf23e94155ae4..1e4931e322c9a 100644
--- a/llvm/test/CodeGen/AArch64/neon-addlv.ll
+++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll
@@ -90,8 +90,7 @@ define i16 @saddlv4h_from_v8i8(<8 x i8>* %A) nounwind {
; CHECK-LABEL: saddlv4h_from_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: saddlp v0.4h, v0.8b
-; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: saddlv h0, v0.8b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, <8 x i8>* %A
@@ -104,8 +103,7 @@ define i16 @saddlv16b_from_v16i8(<16 x i8>* %A) nounwind {
; CHECK-LABEL: saddlv16b_from_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: saddlv h0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, <16 x i8>* %A
@@ -118,8 +116,7 @@ define i32 @saddlv8h_from_v8i16(<8 x i16>* %A) nounwind {
; CHECK-LABEL: saddlv8h_from_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
@@ -132,8 +129,7 @@ define i64 @saddlv4s_from_v4i32(<4 x i32>* %A) nounwind {
; CHECK-LABEL: saddlv4s_from_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
@@ -146,8 +142,7 @@ define i32 @saddlv4h_from_v4i16(<4 x i16>* %A) nounwind {
; CHECK-LABEL: saddlv4h_from_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: saddlp v0.2s, v0.4h
-; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: saddlv s0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, <4 x i16>* %A
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 273f2e02baa55..3b946b8b2e092 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -28,8 +28,7 @@ entry:
define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
; CHECK-LABEL: add_v4i32_v4i64_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
@@ -79,8 +78,7 @@ entry:
define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
; CHECK-LABEL: add_v8i16_v8i32_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
@@ -177,8 +175,7 @@ define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
; CHECK-LABEL: add_v4i16_v4i64_sext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
@@ -295,8 +292,7 @@ define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
; CHECK-BASE-LABEL: add_v8i8_v8i32_sext:
; CHECK-BASE: // %bb.0: // %entry
; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v0.4s
+; CHECK-BASE-NEXT: saddlv s0, v0.8h
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
@@ -596,8 +592,7 @@ entry:
define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
; CHECK-NEXT: ret
@@ -655,8 +650,7 @@ entry:
define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: add w0, w8, w0
; CHECK-NEXT: ret
@@ -768,8 +762,7 @@ define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
; CHECK-LABEL: add_v4i16_v4i64_acc_sext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
; CHECK-NEXT: ret
@@ -901,8 +894,7 @@ define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext:
; CHECK-BASE: // %bb.0: // %entry
; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v0.4s
+; CHECK-BASE-NEXT: saddlv s0, v0.8h
; CHECK-BASE-NEXT: fmov w8, s0
; CHECK-BASE-NEXT: add w0, w8, w0
; CHECK-BASE-NEXT: ret
@@ -974,8 +966,7 @@ entry:
define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: saddlv h0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: sxth w0, w8
@@ -1695,10 +1686,8 @@ entry:
define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: add_pair_v16i8_v16i16_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: saddlp v1.8h, v1.16b
-; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: addv h1, v1.8h
+; CHECK-NEXT: saddlv h0, v0.16b
+; CHECK-NEXT: saddlv h1, v1.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: add w8, w8, w9
More information about the llvm-commits
mailing list