[clang] 663bcb3 - [SVE] Replace unnecessary Intrinsic::aarch64_sve_ptrue construction. (#203349)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 12 04:02:06 PDT 2026
Author: Paul Walker
Date: 2026-06-12T12:02:01+01:00
New Revision: 663bcb3574d72552b41de6a740e454f2f53e2f4a
URL: https://github.com/llvm/llvm-project/commit/663bcb3574d72552b41de6a740e454f2f53e2f4a
DIFF: https://github.com/llvm/llvm-project/commit/663bcb3574d72552b41de6a740e454f2f53e2f4a.diff
LOG: [SVE] Replace unnecessary Intrinsic::aarch64_sve_ptrue construction. (#203349)
Prefer ConstantInt::getTrue() over sve.ptrue(31) when creating
all-active boolean vectors.
Added:
Modified:
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/CodeGen/TargetBuiltins/ARM.cpp
clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index aeace0d789a61..4757cf1411937 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4877,7 +4877,6 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
llvm::Type *ReturnType,
ArrayRef<llvm::Value *> Ops);
- llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty);
llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index ece8ff21561cf..4c668dabd53dc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3390,13 +3390,6 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
}
}
-llvm::Value *
-CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
- Function *Ptrue =
- CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
- return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
-}
-
constexpr unsigned SVEBitsPerBlock = 128;
static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
@@ -4277,7 +4270,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return DupQLane;
SVETypeFlags TypeFlags(Builtin->TypeModifier);
- Value *Pred = EmitSVEAllTruePred(TypeFlags);
+ Constant *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags));
// For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c
index 7a645c5c38b8c..d0c2883a34c67 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c
@@ -607,9 +607,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR
// CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15
// CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0)
// CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0)
-// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]]
+// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]]
//
// CPP-CHECK-LABEL: @_Z16test_svdupq_n_b8bbbbbbbbbbbbbbbb(
// CPP-CHECK-NEXT: entry:
@@ -679,9 +678,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15
// CPP-CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0)
// CPP-CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0)
-// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]]
+// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]]
//
svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
bool x4, bool x5, bool x6, bool x7,
@@ -728,10 +726,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
// CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7
// CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0)
// CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0)
-// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]]
+// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]]
//
// CPP-CHECK-LABEL: @_Z17test_svdupq_n_b16bbbbbbbb(
// CPP-CHECK-NEXT: entry:
@@ -769,10 +766,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
// CPP-CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7
// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0)
// CPP-CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0)
-// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]]
+// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]]
//
svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3,
bool x4, bool x5, bool x6, bool x7) MODE_ATTR
@@ -801,10 +797,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3,
// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3
// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0)
// CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0)
-// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]]
+// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]]
//
// CPP-CHECK-LABEL: @_Z17test_svdupq_n_b32bbbb(
// CPP-CHECK-NEXT: entry:
@@ -826,10 +821,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3,
// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0)
// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0)
-// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]]
+// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]]
//
svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR
{
@@ -849,10 +843,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0)
// CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
+// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]]
//
// CPP-CHECK-LABEL: @_Z17test_svdupq_n_b64bb(
// CPP-CHECK-NEXT: entry:
@@ -866,10 +859,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
+// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]]
//
svbool_t test_svdupq_n_b64(bool x0, bool x1) MODE_ATTR
{
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8f16f1503aa9..b5e82333e0801 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18728,17 +18728,13 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Value *PTrue = nullptr;
if (UseScalable) {
- std::optional<unsigned> PgPattern =
- getSVEPredPatternFromNumElements(FVTy->getNumElements());
- if (Subtarget->getMinSVEVectorSizeInBits() ==
- Subtarget->getMaxSVEVectorSizeInBits() &&
- Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
- PgPattern = AArch64SVEPredPattern::all;
-
- auto *PTruePat =
- ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
- PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
- {PTruePat});
+ if (DL.getTypeSizeInBits(FVTy) != Subtarget->getSVEVectorSizeInBits()) {
+ std::optional<unsigned> PgPattern =
+ getSVEPredPatternFromNumElements(FVTy->getNumElements());
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy,
+ Builder.getInt32(*PgPattern));
+ } else
+ PTrue = ConstantInt::getTrue(PredTy);
}
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
@@ -18950,18 +18946,13 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
Value *PTrue = nullptr;
if (UseScalable) {
- std::optional<unsigned> PgPattern =
- getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
- if (Subtarget->getMinSVEVectorSizeInBits() ==
- Subtarget->getMaxSVEVectorSizeInBits() &&
- Subtarget->getMinSVEVectorSizeInBits() ==
- DL.getTypeSizeInBits(SubVecTy))
- PgPattern = AArch64SVEPredPattern::all;
-
- auto *PTruePat =
- ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
- PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
- {PTruePat});
+ if (DL.getTypeSizeInBits(SubVecTy) != Subtarget->getSVEVectorSizeInBits()) {
+ std::optional<unsigned> PgPattern =
+ getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy,
+ Builder.getInt32(*PgPattern));
+ } else
+ PTrue = ConstantInt::getTrue(PredTy);
}
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b19d967900641..71077846088a2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2155,15 +2155,12 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
if ((PredicateBits & (1 << I)) == 0)
return std::nullopt;
- auto *PTruePat =
- ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
- auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
- {PredType}, {PTruePat});
- auto *ConvertToSVBool = IC.Builder.CreateIntrinsic(
- Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
+ auto *ConvertToSVBool =
+ IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
+ PredType, ConstantInt::getTrue(PredType));
auto *ConvertFromSVBool =
IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
- {II.getType()}, {ConvertToSVBool});
+ II.getType(), ConvertToSVBool);
ConvertFromSVBool->takeName(&II);
return IC.replaceInstUsesWith(II, ConvertFromSVBool);
@@ -2287,15 +2284,10 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
IntrinsicInst &II) {
- LLVMContext &Ctx = II.getContext();
// Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr
// can work with RDFFR_PP for ptest elimination.
- auto *AllPat =
- ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
- auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
- {II.getType()}, {AllPat});
- auto *RDFFR =
- IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {PTrue});
+ auto *RDFFR = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z,
+ ConstantInt::getTrue(II.getType()));
RDFFR->takeName(&II);
return IC.replaceInstUsesWith(II, RDFFR);
}
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
index f25f1b89164f7..7c676875acf92 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
@@ -7,8 +7,7 @@ target triple = "aarch64-linux-gnu"
define void @load_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_factor2(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0
@@ -26,8 +25,7 @@ define void @load_factor2(ptr %ptr) #0 {
define void @load_factor3(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_factor3(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
@@ -46,8 +44,7 @@ define void @load_factor3(ptr %ptr) #0 {
define void @load_factor4(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_factor4(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2
@@ -69,12 +66,11 @@ define void @load_factor4(ptr %ptr) #0 {
define void @store_factor2(ptr %ptr, <16 x i16> %v0, <16 x i16> %v1) #0 {
; CHECK-LABEL: define void @store_factor2(
; CHECK-SAME: ptr [[PTR:%.*]], <16 x i16> [[V0:%.*]], <16 x i16> [[V1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP4]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%interleaved.vec = shufflevector <16 x i16> %v0, <16 x i16> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23,
@@ -88,14 +84,13 @@ define void @store_factor3(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2
; CHECK-SAME: ptr [[PTR:%.*]], <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP6]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -113,7 +108,6 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2
; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2]], <4 x i64> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -122,7 +116,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -135,8 +129,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2
define void @load_ptrvec_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_ptrvec_factor2(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr>
@@ -154,8 +147,7 @@ define void @load_ptrvec_factor2(ptr %ptr) #0 {
define void @load_ptrvec_factor3(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_ptrvec_factor3(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr>
@@ -177,8 +169,7 @@ define void @load_ptrvec_factor3(ptr %ptr) #0 {
define void @load_ptrvec_factor4(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_ptrvec_factor4(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr>
@@ -206,12 +197,11 @@ define void @store_ptrvec_factor2(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1) #0 {
; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[V0]] to <4 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%interleaved.vec = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -226,14 +216,13 @@ define void @store_ptrvec_factor3(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -250,7 +239,6 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -259,7 +247,7 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0)
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP10]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -273,14 +261,13 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p
define void @load_factor2_wide(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_factor2_wide(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[PTR]], i32 8
-; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[TMP6]])
+; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 1
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP7]], i64 0)
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 0
@@ -298,18 +285,17 @@ define void @load_factor2_wide(ptr %ptr) #0 {
define void @store_factor2_wide(ptr %ptr, <8 x i64> %v0, <8 x i64> %v1) #0 {
; CHECK-LABEL: define void @store_factor2_wide(
; CHECK-SAME: ptr [[PTR:%.*]], <8 x i64> [[V0:%.*]], <8 x i64> [[V1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[PTR]], i32 8
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[TMP10]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[TMP10]])
; CHECK-NEXT: ret void
;
%interleaved.vec = shufflevector <8 x i64> %v0, <8 x i64> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -432,8 +418,7 @@ define void @store_min_ge_type(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 {
define void @load_double_factor4(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_double_factor4(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 2
@@ -455,8 +440,7 @@ define void @load_double_factor4(ptr %ptr) #0 {
define void @load_float_factor3(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_float_factor3(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 2
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1
@@ -475,8 +459,7 @@ define void @load_float_factor3(ptr %ptr) #0 {
define void @load_half_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_half_factor2(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16(<vscale x 8 x half> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 0
@@ -492,8 +475,7 @@ define void @load_half_factor2(ptr %ptr) #0 {
define void @load_bfloat_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @load_bfloat_factor2(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 0
@@ -511,7 +493,6 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1,
; CHECK-SAME: ptr [[PTR:%.*]], <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0]], <4 x double> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2]], <4 x double> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -520,7 +501,7 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1,
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP6]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP8]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -535,14 +516,13 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8
; CHECK-SAME: ptr [[PTR:%.*]], <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <8 x float> [[V2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0]], <8 x float> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP6]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -558,12 +538,11 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8
define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 {
; CHECK-LABEL: define void @store_half_factor2(
; CHECK-SAME: ptr [[PTR:%.*]], <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP4]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23,
@@ -576,12 +555,11 @@ define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 {
define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 {
; CHECK-LABEL: define void @store_bfloat_factor2(
; CHECK-SAME: ptr [[PTR:%.*]], <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP2]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP4]], i64 0)
-; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]])
+; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
; CHECK-NEXT: ret void
;
%interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23,
More information about the cfe-commits
mailing list