[llvm] 5420fc4 - [AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend

Usman Nadeem via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 9 15:00:14 PDT 2021


Author: Usman Nadeem
Date: 2021-08-09T14:58:54-07:00
New Revision: 5420fc4a279bdd07cada4b607846ef0e9d184a13

URL: https://github.com/llvm/llvm-project/commit/5420fc4a279bdd07cada4b607846ef0e9d184a13
DIFF: https://github.com/llvm/llvm-project/commit/5420fc4a279bdd07cada4b607846ef0e9d184a13.diff

LOG: [AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend

Replace vector unpack operation with a scalar extend operation.
  unpack(splat(X)) --> splat(extend(X))

If we have both, unpkhi and unpklo, for the same vector then we may
save a register in some cases, e.g:
  Hi = unpkhi (splat(X))
  Lo = unpklo(splat(X))
   --> Hi = Lo = splat(extend(X))

Differential Revision: https://reviews.llvm.org/D106929

Change-Id: I77c5c201131e3a50de1cdccbdcf84420f5b2244b

Added: 
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c6a833cbb02f..882f1c01664f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -783,6 +783,28 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
   return None;
 }
 
+static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
+                                                    IntrinsicInst &II) {
+  IRBuilder<> Builder(II.getContext());
+  Builder.SetInsertPoint(&II);
+  Value *UnpackArg = II.getArgOperand(0);
+  auto *RetTy = cast<ScalableVectorType>(II.getType());
+  bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
+                  II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
+
+  // Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
+  // Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
+  if (auto *ScalarArg = getSplatValue(UnpackArg)) {
+    ScalarArg =
+        Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
+    Value *NewVal =
+        Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
+    NewVal->takeName(&II);
+    return IC.replaceInstUsesWith(II, NewVal);
+  }
+
+  return None;
+}
 static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
                                                  IntrinsicInst &II) {
   auto *OpVal = II.getOperand(0);
@@ -848,6 +870,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVEVectorMul(IC, II);
   case Intrinsic::aarch64_sve_tbl:
     return instCombineSVETBL(IC, II);
+  case Intrinsic::aarch64_sve_uunpkhi:
+  case Intrinsic::aarch64_sve_uunpklo:
+  case Intrinsic::aarch64_sve_sunpkhi:
+  case Intrinsic::aarch64_sve_sunpklo:
+    return instCombineSVEUnpack(IC, II);
   }
 
   return None;

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll
new file mode 100644
index 000000000000..d7c616cb103e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64"
+
+define <vscale x 4 x i32> @uunpkhi_splat(i16 %a) #0 {
+; CHECK-LABEL: @uunpkhi_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[UNPACK]]
+;
+  %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
+  ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @uunpklo_splat(i16 %a) #0 {
+; CHECK-LABEL: @uunpklo_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[UNPACK]]
+;
+  %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %splat)
+  ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @sunpkhi_splat(i16 %a) #0 {
+; CHECK-LABEL: @sunpkhi_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[UNPACK]]
+;
+  %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
+  ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @sunpklo_splat(i16 %a) #0 {
+; CHECK-LABEL: @sunpklo_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[UNPACK]]
+;
+  %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %splat)
+  ret <vscale x 4 x i32> %unpack
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
+
+attributes #0 = { "target-features"="+sve" }


        


More information about the llvm-commits mailing list