[llvm] 5420fc4 - [AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend
Usman Nadeem via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 9 15:00:14 PDT 2021
Author: Usman Nadeem
Date: 2021-08-09T14:58:54-07:00
New Revision: 5420fc4a279bdd07cada4b607846ef0e9d184a13
URL: https://github.com/llvm/llvm-project/commit/5420fc4a279bdd07cada4b607846ef0e9d184a13
DIFF: https://github.com/llvm/llvm-project/commit/5420fc4a279bdd07cada4b607846ef0e9d184a13.diff
LOG: [AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend
Replace vector unpack operation with a scalar extend operation.
unpack(splat(X)) --> splat(extend(X))
If we have both, unpkhi and unpklo, for the same vector then we may
save a register in some cases, e.g:
Hi = unpkhi (splat(X))
Lo = unpklo(splat(X))
--> Hi = Lo = splat(extend(X))
Differential Revision: https://reviews.llvm.org/D106929
Change-Id: I77c5c201131e3a50de1cdccbdcf84420f5b2244b
Added:
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c6a833cbb02f..882f1c01664f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -783,6 +783,28 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Value *UnpackArg = II.getArgOperand(0);
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
+ II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
+
+ // Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
+ // Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
+ if (auto *ScalarArg = getSplatValue(UnpackArg)) {
+ ScalarArg =
+ Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
+ Value *NewVal =
+ Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
+ NewVal->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewVal);
+ }
+
+ return None;
+}
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpVal = II.getOperand(0);
@@ -848,6 +870,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEVectorMul(IC, II);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
+ case Intrinsic::aarch64_sve_uunpkhi:
+ case Intrinsic::aarch64_sve_uunpklo:
+ case Intrinsic::aarch64_sve_sunpkhi:
+ case Intrinsic::aarch64_sve_sunpklo:
+ return instCombineSVEUnpack(IC, II);
}
return None;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll
new file mode 100644
index 000000000000..d7c616cb103e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64"
+
+define <vscale x 4 x i32> @uunpkhi_splat(i16 %a) #0 {
+; CHECK-LABEL: @uunpkhi_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
+;
+ %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
+ ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @uunpklo_splat(i16 %a) #0 {
+; CHECK-LABEL: @uunpklo_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
+;
+ %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %splat)
+ ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @sunpkhi_splat(i16 %a) #0 {
+; CHECK-LABEL: @sunpkhi_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
+;
+ %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
+ ret <vscale x 4 x i32> %unpack
+}
+
+define <vscale x 4 x i32> @sunpklo_splat(i16 %a) #0 {
+; CHECK-LABEL: @sunpklo_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
+;
+ %splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %splat)
+ ret <vscale x 4 x i32> %unpack
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
+
+attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list