[llvm] b4b369a - [LLVM][InstCombine][AArch64] sve.dup(V, all_active, S) ==> splat(S) (#170292)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 02:58:30 PST 2025
Author: Paul Walker
Date: 2025-12-04T10:58:26Z
New Revision: b4b369a6bf2d9103bac619172ee1bdec992d6730
URL: https://github.com/llvm/llvm-project/commit/b4b369a6bf2d9103bac619172ee1bdec992d6730
DIFF: https://github.com/llvm/llvm-project/commit/b4b369a6bf2d9103bac619172ee1bdec992d6730.diff
LOG: [LLVM][InstCombine][AArch64] sve.dup(V, all_active, S) ==> splat(S) (#170292)
Also refactors the rest of instCombineSVEDup to simplify the code.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e8bbce202b407..0b19471eedb78 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1891,25 +1891,23 @@ static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
IntrinsicInst &II) {
- IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
- if (!Pg)
- return std::nullopt;
+ Value *Pg = II.getOperand(1);
- if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
- return std::nullopt;
+ // sve.dup(V, all_active, X) ==> splat(X)
+ if (isAllActivePredicate(Pg)) {
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ Value *Splat = IC.Builder.CreateVectorSplat(RetTy->getElementCount(),
+ II.getArgOperand(2));
+ return IC.replaceInstUsesWith(II, Splat);
+ }
- const auto PTruePattern =
- cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
- if (PTruePattern != AArch64SVEPredPattern::vl1)
+ if (!match(Pg, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_SpecificInt(AArch64SVEPredPattern::vl1))))
return std::nullopt;
- // The intrinsic is inserting into lane zero so use an insert instead.
- auto *IdxTy = Type::getInt64Ty(II.getContext());
- auto *Insert = InsertElementInst::Create(
- II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
- Insert->insertBefore(II.getIterator());
- Insert->takeName(&II);
-
+ // sve.dup(V, sve.ptrue(vl1), X) ==> insertelement V, X, 0
+ Value *Insert = IC.Builder.CreateInsertElement(
+ II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));
return IC.replaceInstUsesWith(II, Insert);
}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
index 41e7afcd110c9..0213d05db3eff 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
@@ -55,15 +55,40 @@ define <vscale x 8 x i16> @dupx_splat_convert(i16 %s) #0 {
ret <vscale x 8 x i16> %splat
}
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
+define <vscale x 16 x i8> @dup_all_active_i8(<vscale x 16 x i8> %v, i8 %s) #0 {
+; CHECK-LABEL: @dup_all_active_i8(
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[S:%.*]], i64 0
+; CHECK-NEXT: [[INSERT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 16 x i8> [[INSERT]]
+;
+ %insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> splat(i1 true), i8 %s)
+ ret <vscale x 16 x i8> %insert
+}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
+define <vscale x 4 x i32> @dup_all_active_i32(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: @dup_all_active_i32(
+; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 73)
+;
+ %insert = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i1> splat(i1 true), i32 73)
+ ret <vscale x 4 x i32> %insert
+}
-declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+define <vscale x 4 x float> @dup_all_active_f32(<vscale x 4 x float> %v, float %s) #0 {
+; CHECK-LABEL: @dup_all_active_f32(
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[S:%.*]], i64 0
+; CHECK-NEXT: [[INSERT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 4 x float> [[INSERT]]
+;
+ %insert = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %v, <vscale x 4 x i1> splat(i1 true), float %s)
+ ret <vscale x 4 x float> %insert
+}
-declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+define <vscale x 2 x double> @dup_all_active_f64(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: @dup_all_active_f64(
+; CHECK-NEXT: ret <vscale x 2 x double> splat (double 1.000000e+00)
+;
+ %insert = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %v, <vscale x 2 x i1> splat(i1 true), double 1.0)
+ ret <vscale x 2 x double> %insert
+}
attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list