[llvm] 67fc0d3 - [AArch64] Remove copy instruction between uaddlv and dup
Jingu Kang via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 06:42:37 PDT 2023
Author: Jingu Kang
Date: 2023-09-05T14:41:28+01:00
New Revision: 67fc0d3d39db794b8b1280ff60c0193631e3c821
URL: https://github.com/llvm/llvm-project/commit/67fc0d3d39db794b8b1280ff60c0193631e3c821
DIFF: https://github.com/llvm/llvm-project/commit/67fc0d3d39db794b8b1280ff60c0193631e3c821.diff
LOG: [AArch64] Remove copy instruction between uaddlv and dup
If there are copy instructions between uaddlv and dup for transfer from gpr to
fpr, try to remove them with duplane.
Differential Revision: https://reviews.llvm.org/D159267
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
llvm/test/CodeGen/AArch64/dp1.ll
llvm/test/CodeGen/AArch64/neon-addlv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e209839f45e78..2bb8e43243066b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2428,6 +2428,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
+ MAKE_CASE(AArch64ISD::UADDLV)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
@@ -5323,6 +5324,20 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::aarch64_neon_uaddlv: {
+ EVT OpVT = Op.getOperand(1).getValueType();
+ EVT ResVT = Op.getValueType();
+ if (ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8)) {
+ // In order to avoid insert_subvector, used v4i32 than v2i32.
+ SDValue UADDLV =
+ DAG.getNode(AArch64ISD::UADDLV, dl, MVT::v4i32, Op.getOperand(1));
+ SDValue EXTRACT_VEC_ELT =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, UADDLV,
+ DAG.getConstant(0, dl, MVT::i64));
+ return EXTRACT_VEC_ELT;
+ }
+ return SDValue();
+ }
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6e92b2fcab0756..67c344318e0d3e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -242,6 +242,9 @@ enum NodeType : unsigned {
SADDV,
UADDV,
+ // Unsigned sum Long across Vector
+ UADDLV,
+
// Add Pairwise of two vectors
ADDP,
// Add Long Pairwise
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 5bdb1d9ffc6d9b..4a1f46f2576aec 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -752,6 +752,7 @@ def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abdu node:$lhs, node:$rhs),
@@ -6461,6 +6462,12 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op)))
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
ssub))>;
+def : Pat<(v4i32 (AArch64uaddlv (v8i8 V64:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$Rn), hsub))>;
+
+def : Pat<(v4i32 (AArch64uaddlv (v16i8 V128:$Rn))),
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$Rn), hsub))>;
+
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 665e8f90f850b4..bf420700eb575f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -95,8 +95,8 @@ define void @insert_vec_v2i32_uaddlv_from_v16i8(ptr %0) {
; CHECK-NEXT: movi.2d v1, #0000000000000000
; CHECK-NEXT: uaddlv.16b h0, v0
; CHECK-NEXT: mov.s v1[0], v0[0]
-; CHECK-NEXT: ucvtf.2s v1, v1
-; CHECK-NEXT: str d1, [x0]
+; CHECK-NEXT: ucvtf.2s v0, v1
+; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll
index 27b105381aa079..bb5b19e51995a4 100644
--- a/llvm/test/CodeGen/AArch64/dp1.ll
+++ b/llvm/test/CodeGen/AArch64/dp1.ll
@@ -205,8 +205,7 @@ define void @ctpop_i32() {
; CHECK-SDAG-NEXT: fmov d0, x9
; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b
; CHECK-SDAG-NEXT: uaddlv h0, v0.8b
-; CHECK-SDAG-NEXT: fmov w9, s0
-; CHECK-SDAG-NEXT: str w9, [x8]
+; CHECK-SDAG-NEXT: str s0, [x8]
; CHECK-SDAG-NEXT: ret
;
; CHECK-GISEL-LABEL: ctpop_i32:
diff --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll
index aaa9f9139b0908..0f5a19c7a0f3b8 100644
--- a/llvm/test/CodeGen/AArch64/neon-addlv.ll
+++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll
@@ -177,3 +177,21 @@ entry:
%0 = and i32 %vaddlv.i, 65535
ret i32 %0
}
+
+define dso_local <8 x i8> @bar(<8 x i8> noundef %a) local_unnamed_addr #0 {
+; CHECK-LABEL: bar:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv h0, v0.8b
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
+; CHECK-NEXT: ret
+entry:
+ %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+ %0 = trunc i32 %vaddlv.i to i16
+ %vecinit.i = insertelement <8 x i16> undef, i16 %0, i64 0
+ %vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer
+ %vrshrn_n2 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %vecinit7.i, i32 3)
+ ret <8 x i8> %vrshrn_n2
+}
+
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
More information about the llvm-commits
mailing list