[clang] [llvm] [AArch64][SVE] Instcombine ptrue(all) to splat(i1) (PR #135016)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 9 06:59:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Matthew Devereau (MDevereau)
<details>
<summary>Changes</summary>
SVE Operations such as predicated loads become canonicalized to LLVM masked loads, and doing the same for ptrue(all) to splat(1) creates further optimization opportunities from generic LLVM IR passes.
---
Patch is 214.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135016.diff
11 Files Affected:
- (modified) clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c (+2-4)
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c (+2-4)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+29-12)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll (+1-2)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll (+35-70)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll (+10-12)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll (+25-47)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll (+6-12)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll (+242-363)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll (+1-2)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll (+1-2)
``````````diff
diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index bbed683ac1fd7..c3d0541229fac 100644
--- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -52,9 +52,8 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0,
typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
// CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x)
// CHECK128-NEXT: entry:
-// CHECK128-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
// CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[X:%.*]], i64 0)
-// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
+// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
// CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0)
// CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]]
@@ -62,9 +61,8 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0)
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
index 6bf56bdea505c..ca3480d62725a 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
@@ -7,14 +7,12 @@
// CHECK-LABEL: @test_svrdffr(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z12test_svrdffrv(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svrdffr()
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1d8277182add..ae5792cc8c1f1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1492,9 +1492,17 @@ static bool isAllActivePredicate(Value *Pred) {
if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
Pred = UncastedPred;
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>())))
+ return true;
+
+ if (Value *Splat = getSplatValue(Pred)) {
+ auto ConstIdx = dyn_cast<ConstantInt>(Splat);
+ if (ConstIdx->getZExtValue() == 1)
+ return true;
+ }
- return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>()));
+ return false;
}
// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
@@ -1701,14 +1709,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
- // Check that the predicate is all active
- auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
- if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
- return std::nullopt;
-
- const auto PTruePattern =
- cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
- if (PTruePattern != AArch64SVEPredPattern::all)
+ if (!isAllActivePredicate(II.getArgOperand(0)))
return std::nullopt;
// Check that we have a compare of zero..
@@ -2118,8 +2119,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
if (BinOpCode == Instruction::BinaryOpsEnd ||
- !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>())))
+ !isAllActivePredicate(OpPredicate))
return std::nullopt;
auto BinOp = IC.Builder.CreateBinOpFMF(
BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
@@ -2641,6 +2641,21 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
return std::nullopt;
}
+static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(&II);
+ auto Type = cast<VectorType>(II.getType());
+ ConstantInt *Pattern;
+ if (match(II.getOperand(0), m_ConstantInt(Pattern)) &&
+ Pattern->getZExtValue() == AArch64SVEPredPattern::all) {
+ Value *One = ConstantInt::get(Builder.getInt1Ty(), APInt(1, 1));
+ Value *SplatOne =
+ Builder.CreateVectorSplat(Type->getElementCount(), One);
+ return IC.replaceInstUsesWith(II, SplatOne);
+ }
+ return std::nullopt;
+}
+
std::optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -2744,6 +2759,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEDupqLane(IC, II);
case Intrinsic::aarch64_sve_insr:
return instCombineSVEInsr(IC, II);
+ case Intrinsic::aarch64_sve_ptrue:
+ return instCombinePTrue(IC, II);
}
return std::nullopt;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
index 7fb0fbdda0b5d..f71aaa289b89c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vsca
define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
; CHECK-LABEL: @srshl_abs_all_active_pred(
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2))
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
index b8ea4de3d2382..1c5f7464d858a 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
@@ -5,8 +5,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -17,8 +16,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a
define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -29,8 +27,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vsca
define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -41,8 +38,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x
define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -53,8 +49,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x
define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -65,8 +60,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale
define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -77,8 +71,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale
define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -89,8 +82,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscal
define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -101,8 +93,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscal
define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -113,8 +104,7 @@ define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscal
define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -125,8 +115,7 @@ define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vsc
define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -137,8 +126,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs
define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -149,8 +137,7 @@ define <vs...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/135016
More information about the cfe-commits
mailing list