[llvm] 5474d49 - [AArch64] Remove copy instruction between uaddlv and urshr

Jingu Kang via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 01:07:37 PDT 2023


Author: Jingu Kang
Date: 2023-09-11T09:06:09+01:00
New Revision: 5474d49f1f5c4b5861cf611ac291d777f6bf54d5

URL: https://github.com/llvm/llvm-project/commit/5474d49f1f5c4b5861cf611ac291d777f6bf54d5
DIFF: https://github.com/llvm/llvm-project/commit/5474d49f1f5c4b5861cf611ac291d777f6bf54d5.diff

LOG: [AArch64] Remove copy instruction between uaddlv and urshr

If there are copy instructions between uaddlv and urshr for transfer from gpr
to fpr, and vice versa, try to remove them.

Differential Revision: https://reviews.llvm.org/D159265

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/neon-addlv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abe100335c23172..fb25dd1d77553fb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1013,6 +1013,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::VECREDUCE_OR);
   setTargetDAGCombine(ISD::VECREDUCE_XOR);
 
+  setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+
   // In case of strict alignment, avoid an excessive number of byte wide stores.
   MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemset =
@@ -23121,6 +23123,55 @@ static SDValue performMULLCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue
+performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                             SelectionDAG &DAG) {
+  // Let's do below transform.
+  //
+  //         t34: v4i32 = AArch64ISD::UADDLV t2
+  //       t35: i32 = extract_vector_elt t34, Constant:i64<0>
+  //     t7: i64 = zero_extend t35
+  //   t20: v1i64 = scalar_to_vector t7
+  // ==>
+  //      t34: v4i32 = AArch64ISD::UADDLV t2
+  //    t39: v2i32 = extract_subvector t34, Constant:i64<0>
+  //  t40: v1i64 = AArch64ISD::NVCAST t39
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v1i64)
+    return SDValue();
+
+  SDValue ZEXT = N->getOperand(0);
+  if (ZEXT.getOpcode() != ISD::ZERO_EXTEND || ZEXT.getValueType() != MVT::i64)
+    return SDValue();
+
+  SDValue EXTRACT_VEC_ELT = ZEXT.getOperand(0);
+  if (EXTRACT_VEC_ELT.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      EXTRACT_VEC_ELT.getValueType() != MVT::i32)
+    return SDValue();
+
+  if (!isNullConstant(EXTRACT_VEC_ELT.getOperand(1)))
+    return SDValue();
+
+  SDValue UADDLV = EXTRACT_VEC_ELT.getOperand(0);
+  if (UADDLV.getOpcode() != AArch64ISD::UADDLV ||
+      UADDLV.getValueType() != MVT::v4i32 ||
+      UADDLV.getOperand(0).getValueType() != MVT::v8i8)
+    return SDValue();
+
+  // Let's generate new sequence with AArch64ISD::NVCAST.
+  SDLoc DL(N);
+  SDValue EXTRACT_SUBVEC =
+      DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV,
+                  DAG.getConstant(0, DL, MVT::i64));
+  SDValue NVCAST =
+      DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, EXTRACT_SUBVEC);
+
+  return NVCAST;
+}
+
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -23436,6 +23487,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
   case ISD::CTLZ:
     return performCTLZCombine(N, DAG, Subtarget);
+  case ISD::SCALAR_TO_VECTOR:
+    return performScalarToVectorCombine(N, DCI, DAG);
   }
   return SDValue();
 }

diff  --git a/llvm/test/CodeGen/AArch64/neon-addlv.ll b/llvm/test/CodeGen/AArch64/neon-addlv.ll
index 0f5a19c7a0f3b87..1b037c13aa4b546 100644
--- a/llvm/test/CodeGen/AArch64/neon-addlv.ll
+++ b/llvm/test/CodeGen/AArch64/neon-addlv.ll
@@ -178,8 +178,8 @@ entry:
   ret i32 %0
 }
 
-define dso_local <8 x i8> @bar(<8 x i8> noundef %a) local_unnamed_addr #0 {
-; CHECK-LABEL: bar:
+define dso_local <8 x i8> @uaddlv_v8i8_dup(<8 x i8> %a) {
+; CHECK-LABEL: uaddlv_v8i8_dup:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    uaddlv h0, v0.8b
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
@@ -195,3 +195,23 @@ entry:
 }
 
 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
+
+declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64)
+
+define <8 x i8> @uaddlv_v8i8_urshr(<8 x i8> %a) {
+; CHECK-LABEL: uaddlv_v8i8_urshr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddlv h0, v0.8b
+; CHECK-NEXT:    urshr d0, d0, #3
+; CHECK-NEXT:    dup v0.8b, v0.b[0]
+; CHECK-NEXT:    ret
+entry:
+  %vaddlv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+  %0 = and i32 %vaddlv.i, 65535
+  %conv = zext i32 %0 to i64
+  %vrshr_n = tail call i64 @llvm.aarch64.neon.urshl.i64(i64 %conv, i64 -3)
+  %conv1 = trunc i64 %vrshr_n to i8
+  %vecinit.i = insertelement <8 x i8> undef, i8 %conv1, i64 0
+  %vecinit7.i = shufflevector <8 x i8> %vecinit.i, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %vecinit7.i
+}


        


More information about the llvm-commits mailing list