[llvm] [RISCV][TTI] Add vp.fneg intrinsic cost with functionalOP (PR #114378)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 02:23:09 PDT 2024
https://github.com/LiqinWeng created https://github.com/llvm/llvm-project/pull/114378
None
>From 1ed3a71a59f9e6ed08273c361399bc1de9284d87 Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Thu, 31 Oct 2024 17:21:52 +0800
Subject: [PATCH] [RISCV][TTI] Add vp.fneg intrinsic cost with functionalOP
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 3 +-
.../CostModel/RISCV/fp-min-max-abs.ll | 166 ++---
.../Analysis/CostModel/RISCV/fp-sqrt-pow.ll | 72 +--
.../CostModel/RISCV/fp-trig-log-exp.ll | 252 ++++----
llvm/test/Analysis/CostModel/RISCV/fround.ll | 608 +++++++++---------
.../Analysis/CostModel/RISCV/int-bit-manip.ll | 568 ++++++++--------
.../Analysis/CostModel/RISCV/int-min-max.ll | 304 ++++-----
.../Analysis/CostModel/RISCV/int-sat-math.ll | 360 +++++------
.../CostModel/RISCV/rvv-intrinsics.ll | 108 ++++
9 files changed, 1275 insertions(+), 1166 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 395baa5f1aab99..2c8b71c01c2e30 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1111,7 +1111,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::vp_fsub:
case Intrinsic::vp_fmul:
case Intrinsic::vp_fdiv:
- case Intrinsic::vp_frem: {
+ case Intrinsic::vp_frem:
+ case Intrinsic::vp_fneg: {
std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
assert(FOp.has_value());
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index 6e4061a42bf9b8..0b2c8da4438da2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -30,20 +30,20 @@ define void @fabs() {
call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.fabs.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.fabs.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.fabs.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.fabs.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.fabs.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.fabs.f64(double undef)
call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.fabs.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.fabs.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.fabs.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.fabs.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -65,10 +65,10 @@ define void @fabs_f16() {
call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
- call <vscale x 2 x half> @llvm.fabs.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.fabs.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.fabs.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.fabs.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -100,20 +100,20 @@ define void @minnum() {
call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.minnum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.minnum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.minnum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.minnum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.minnum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.minnum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.minnum.f64(double undef, double undef)
call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.minnum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.minnum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.minnum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.minnum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.minnum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.minnum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -149,11 +149,11 @@ define void @minnum_f16() {
call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.minnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.minnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.minnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.minnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.minnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
@@ -185,20 +185,20 @@ define void @maxnum() {
call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.maxnum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.maxnum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.maxnum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.maxnum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.maxnum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.maxnum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.maxnum.f64(double undef, double undef)
call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.maxnum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.maxnum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.maxnum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.maxnum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.maxnum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.maxnum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -234,11 +234,11 @@ define void @maxnum_f16() {
call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.maxnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.maxnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.maxnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.maxnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.maxnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
@@ -270,20 +270,20 @@ define void @minimum() {
call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.minimum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.minimum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.minimum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.minimum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.minimum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.minimum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.minimum.f64(double undef, double undef)
call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.minimum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.minimum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.minimum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.minimum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.minimum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -319,11 +319,11 @@ define void @minimum_f16() {
call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.minimum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.minimum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.minimum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.minimum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.minimum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
@@ -355,20 +355,20 @@ define void @maximum() {
call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.maximum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.maximum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.maximum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.maximum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.maximum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.maximum.f64(double undef, double undef)
call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.maximum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.maximum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.maximum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.maximum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -404,11 +404,11 @@ define void @maximum_f16() {
call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.maximum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.maximum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.maximum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.maximum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.maximum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
@@ -440,20 +440,20 @@ define void @copysign() {
call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.copysign.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.copysign.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.copysign.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.copysign.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.copysign.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.copysign.f64(double undef, double undef)
call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.copysign.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.copysign.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.copysign.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.copysign.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -489,10 +489,10 @@ define void @copysign_f16() {
call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.copysign.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.copysign.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.copysign.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.copysign.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.copysign.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
index efe17f2b76a70e..be9c19dc59a852 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
@@ -39,29 +39,29 @@ define void @sqrt() {
call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.sqrt.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.sqrt.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.sqrt.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.sqrt.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.sqrt.f32(float undef)
call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.sqrt.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.sqrt.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.sqrt.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.sqrt.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.sqrt.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.sqrt.f64(double undef)
call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.sqrt.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.sqrt.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.sqrt.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.sqrt.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -83,10 +83,10 @@ define void @sqrt_f16() {
call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
- call <vscale x 2 x half> @llvm.sqrt.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.sqrt.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.sqrt.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.sqrt.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -128,30 +128,30 @@ define void @pow() {
call <4 x bfloat> @llvm.pow.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
call <8 x bfloat> @llvm.pow.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
call <16 x bfloat> @llvm.pow.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.pow.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.pow.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.pow.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.pow.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.pow.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.pow.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.pow.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.pow.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.pow.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.pow.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
call float @llvm.pow.f32(float undef, float undef)
call <2 x float> @llvm.pow.v2f32(<2 x float> undef, <2 x float> undef)
call <4 x float> @llvm.pow.v4f32(<4 x float> undef, <4 x float> undef)
call <8 x float> @llvm.pow.v8f32(<8 x float> undef, <8 x float> undef)
call <16 x float> @llvm.pow.v16f32(<16 x float> undef, <16 x float> undef)
- call <vscale x 1 x float> @llvm.pow.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.pow.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.pow.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.pow.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.pow.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.pow.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.pow.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.pow.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.pow.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
call double @llvm.pow.f64(double undef, double undef)
call <2 x double> @llvm.pow.v2f64(<2 x double> undef, <2 x double> undef)
call <4 x double> @llvm.pow.v4f64(<4 x double> undef, <4 x double> undef)
call <8 x double> @llvm.pow.v8f64(<8 x double> undef, <8 x double> undef)
call <16 x double> @llvm.pow.v16f64(<16 x double> undef, <16 x double> undef)
- call <vscale x 1 x double> @llvm.pow.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.pow.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.pow.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.pow.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.pow.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.pow.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.pow.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
ret void
}
@@ -187,10 +187,10 @@ define void @pow_f16() {
call <4 x half> @llvm.pow.v4f16(<4 x half> undef, <4 x half> undef)
call <8 x half> @llvm.pow.v8f16(<8 x half> undef, <8 x half> undef)
call <16 x half> @llvm.pow.v16f16(<16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.pow.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.pow.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.pow.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.pow.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.pow.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.pow.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.pow.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.pow.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.pow.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.pow.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll b/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
index 34d6c93f4577a7..ba5e40ca03b88a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
@@ -40,30 +40,30 @@ define void @sin() {
call <4 x bfloat> @llvm.sin.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.sin.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.sin.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.sin.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.sin.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.sin.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.sin.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.sin.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.sin.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.sin.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.sin.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.sin.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.sin.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.sin.f32(float undef)
call <2 x float> @llvm.sin.v2f32(<2 x float> undef)
call <4 x float> @llvm.sin.v4f32(<4 x float> undef)
call <8 x float> @llvm.sin.v8f32(<8 x float> undef)
call <16 x float> @llvm.sin.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.sin.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.sin.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.sin.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.sin.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.sin.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.sin.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.sin.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.sin.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.sin.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.sin.f64(double undef)
call <2 x double> @llvm.sin.v2f64(<2 x double> undef)
call <4 x double> @llvm.sin.v4f64(<4 x double> undef)
call <8 x double> @llvm.sin.v8f64(<8 x double> undef)
call <16 x double> @llvm.sin.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.sin.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.sin.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.sin.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.sin.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.sin.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.sin.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.sin.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -97,10 +97,10 @@ define void @sin_f16() {
call <4 x half> @llvm.sin.v4f16(<4 x half> undef)
call <8 x half> @llvm.sin.v8f16(<8 x half> undef)
call <16 x half> @llvm.sin.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.sin.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.sin.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.sin.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.sin.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.sin.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.sin.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.sin.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.sin.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -142,30 +142,30 @@ define void @cos() {
call <4 x bfloat> @llvm.cos.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.cos.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.cos.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.cos.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.cos.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.cos.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.cos.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.cos.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.cos.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.cos.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.cos.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.cos.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.cos.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.cos.f32(float undef)
call <2 x float> @llvm.cos.v2f32(<2 x float> undef)
call <4 x float> @llvm.cos.v4f32(<4 x float> undef)
call <8 x float> @llvm.cos.v8f32(<8 x float> undef)
call <16 x float> @llvm.cos.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.cos.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.cos.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.cos.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.cos.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.cos.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.cos.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.cos.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.cos.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.cos.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.cos.f64(double undef)
call <2 x double> @llvm.cos.v2f64(<2 x double> undef)
call <4 x double> @llvm.cos.v4f64(<4 x double> undef)
call <8 x double> @llvm.cos.v8f64(<8 x double> undef)
call <16 x double> @llvm.cos.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.cos.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.cos.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.cos.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.cos.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.cos.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.cos.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.cos.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -199,10 +199,10 @@ define void @cos_f16() {
call <4 x half> @llvm.cos.v4f16(<4 x half> undef)
call <8 x half> @llvm.cos.v8f16(<8 x half> undef)
call <16 x half> @llvm.cos.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.cos.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.cos.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.cos.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.cos.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.cos.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.cos.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.cos.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.cos.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -244,30 +244,30 @@ define void @exp() {
call <4 x bfloat> @llvm.exp.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.exp.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.exp.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.exp.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.exp.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.exp.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.exp.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.exp.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.exp.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.exp.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.exp.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.exp.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.exp.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.exp.f32(float undef)
call <2 x float> @llvm.exp.v2f32(<2 x float> undef)
call <4 x float> @llvm.exp.v4f32(<4 x float> undef)
call <8 x float> @llvm.exp.v8f32(<8 x float> undef)
call <16 x float> @llvm.exp.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.exp.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.exp.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.exp.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.exp.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.exp.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.exp.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.exp.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.exp.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.exp.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.exp.f64(double undef)
call <2 x double> @llvm.exp.v2f64(<2 x double> undef)
call <4 x double> @llvm.exp.v4f64(<4 x double> undef)
call <8 x double> @llvm.exp.v8f64(<8 x double> undef)
call <16 x double> @llvm.exp.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.exp.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.exp.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.exp.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.exp.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.exp.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.exp.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.exp.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -301,10 +301,10 @@ define void @exp_f16() {
call <4 x half> @llvm.exp.v4f16(<4 x half> undef)
call <8 x half> @llvm.exp.v8f16(<8 x half> undef)
call <16 x half> @llvm.exp.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.exp.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.exp.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.exp.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.exp.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.exp.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.exp.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.exp.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.exp.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -346,30 +346,30 @@ define void @exp2() {
call <4 x bfloat> @llvm.exp2.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.exp2.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.exp2.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.exp2.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.exp2.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.exp2.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.exp2.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.exp2.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.exp2.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.exp2.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.exp2.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.exp2.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.exp2.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.exp2.f32(float undef)
call <2 x float> @llvm.exp2.v2f32(<2 x float> undef)
call <4 x float> @llvm.exp2.v4f32(<4 x float> undef)
call <8 x float> @llvm.exp2.v8f32(<8 x float> undef)
call <16 x float> @llvm.exp2.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.exp2.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.exp2.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.exp2.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.exp2.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.exp2.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.exp2.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.exp2.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.exp2.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.exp2.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.exp2.f64(double undef)
call <2 x double> @llvm.exp2.v2f64(<2 x double> undef)
call <4 x double> @llvm.exp2.v4f64(<4 x double> undef)
call <8 x double> @llvm.exp2.v8f64(<8 x double> undef)
call <16 x double> @llvm.exp2.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.exp2.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.exp2.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.exp2.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.exp2.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.exp2.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.exp2.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.exp2.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -403,10 +403,10 @@ define void @exp2_f16() {
call <4 x half> @llvm.exp2.v4f16(<4 x half> undef)
call <8 x half> @llvm.exp2.v8f16(<8 x half> undef)
call <16 x half> @llvm.exp2.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.exp2.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.exp2.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.exp2.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.exp2.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.exp2.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.exp2.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.exp2.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.exp2.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -448,30 +448,30 @@ define void @log() {
call <4 x bfloat> @llvm.log.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.log.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.log.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.log.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.log.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.log.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.log.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.log.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.log.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.log.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.log.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.log.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.log.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.log.f32(float undef)
call <2 x float> @llvm.log.v2f32(<2 x float> undef)
call <4 x float> @llvm.log.v4f32(<4 x float> undef)
call <8 x float> @llvm.log.v8f32(<8 x float> undef)
call <16 x float> @llvm.log.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.log.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.log.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.log.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.log.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.log.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.log.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.log.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.log.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.log.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.log.f64(double undef)
call <2 x double> @llvm.log.v2f64(<2 x double> undef)
call <4 x double> @llvm.log.v4f64(<4 x double> undef)
call <8 x double> @llvm.log.v8f64(<8 x double> undef)
call <16 x double> @llvm.log.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.log.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.log.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.log.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.log.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.log.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.log.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.log.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -505,10 +505,10 @@ define void @log_f16() {
call <4 x half> @llvm.log.v4f16(<4 x half> undef)
call <8 x half> @llvm.log.v8f16(<8 x half> undef)
call <16 x half> @llvm.log.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.log.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.log.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.log.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.log.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.log.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.log.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.log.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.log.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -550,30 +550,30 @@ define void @log10() {
call <4 x bfloat> @llvm.log10.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.log10.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.log10.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.log10.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.log10.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.log10.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.log10.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.log10.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.log10.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.log10.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.log10.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.log10.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.log10.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.log10.f32(float undef)
call <2 x float> @llvm.log10.v2f32(<2 x float> undef)
call <4 x float> @llvm.log10.v4f32(<4 x float> undef)
call <8 x float> @llvm.log10.v8f32(<8 x float> undef)
call <16 x float> @llvm.log10.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.log10.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.log10.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.log10.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.log10.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.log10.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.log10.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.log10.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.log10.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.log10.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.log10.f64(double undef)
call <2 x double> @llvm.log10.v2f64(<2 x double> undef)
call <4 x double> @llvm.log10.v4f64(<4 x double> undef)
call <8 x double> @llvm.log10.v8f64(<8 x double> undef)
call <16 x double> @llvm.log10.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.log10.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.log10.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.log10.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.log10.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.log10.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.log10.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.log10.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -607,10 +607,10 @@ define void @log10_f16() {
call <4 x half> @llvm.log10.v4f16(<4 x half> undef)
call <8 x half> @llvm.log10.v8f16(<8 x half> undef)
call <16 x half> @llvm.log10.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.log10.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.log10.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.log10.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.log10.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.log10.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.log10.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.log10.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.log10.nxv8f16(<vscale x 8 x half> undef)
ret void
}
@@ -652,30 +652,30 @@ define void @log2() {
call <4 x bfloat> @llvm.log2.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.log2.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.log2.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.log2.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.log2.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.log2.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.log2.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.log2.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.log2.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.log2.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.log2.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.log2.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.log2.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.log2.f32(float undef)
call <2 x float> @llvm.log2.v2f32(<2 x float> undef)
call <4 x float> @llvm.log2.v4f32(<4 x float> undef)
call <8 x float> @llvm.log2.v8f32(<8 x float> undef)
call <16 x float> @llvm.log2.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.log2.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.log2.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.log2.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.log2.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.log2.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.log2.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.log2.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.log2.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.log2.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.log2.f64(double undef)
call <2 x double> @llvm.log2.v2f64(<2 x double> undef)
call <4 x double> @llvm.log2.v4f64(<4 x double> undef)
call <8 x double> @llvm.log2.v8f64(<8 x double> undef)
call <16 x double> @llvm.log2.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.log2.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.log2.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.log2.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.log2.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.log2.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.log2.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.log2.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -709,10 +709,10 @@ define void @log2_f16() {
call <4 x half> @llvm.log2.v4f16(<4 x half> undef)
call <8 x half> @llvm.log2.v8f16(<8 x half> undef)
call <16 x half> @llvm.log2.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.log2.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.log2.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.log2.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.log2.nvx8f16(<vscale x 8 x half> undef)
+ call <vscale x 1 x half> @llvm.log2.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.log2.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.log2.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.log2.nxv8f16(<vscale x 8 x half> undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index c6826760a45bee..cc2ff2f5153b3f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -40,30 +40,30 @@ define void @floor() {
call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.floor.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.floor.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.floor.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.floor.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.floor.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.floor.f32(float undef)
call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.floor.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.floor.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.floor.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.floor.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.floor.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.floor.f64(double undef)
call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.floor.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.floor.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.floor.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.floor.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -99,11 +99,11 @@ define void @floor_fp16() {
call <4 x half> @llvm.floor.v4f16(<4 x half> undef)
call <8 x half> @llvm.floor.v8f16(<8 x half> undef)
call <16 x half> @llvm.floor.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.floor.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.floor.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.floor.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.floor.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.floor.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -145,30 +145,30 @@ define void @ceil() {
call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.ceil.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.ceil.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.ceil.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.ceil.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.ceil.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.ceil.f32(float undef)
call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.ceil.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.ceil.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.ceil.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.ceil.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.ceil.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.ceil.f64(double undef)
call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.ceil.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.ceil.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.ceil.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.ceil.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -204,11 +204,11 @@ define void @ceil_fp16() {
call <4 x half> @llvm.ceil.v4f16(<4 x half> undef)
call <8 x half> @llvm.ceil.v8f16(<8 x half> undef)
call <16 x half> @llvm.ceil.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.ceil.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.ceil.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.ceil.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.ceil.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.ceil.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -250,30 +250,30 @@ define void @trunc() {
call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.trunc.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.trunc.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.trunc.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.trunc.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.trunc.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.trunc.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.trunc.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.trunc.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.trunc.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.trunc.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.trunc.f32(float undef)
call <2 x float> @llvm.trunc.v2f32(<2 x float> undef)
call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.trunc.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.trunc.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.trunc.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.trunc.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.trunc.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.trunc.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.trunc.f64(double undef)
call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
call <16 x double> @llvm.trunc.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.trunc.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.trunc.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.trunc.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.trunc.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.trunc.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -309,11 +309,11 @@ define void @trunc_fp16() {
call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.trunc.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.trunc.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.trunc.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.trunc.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.trunc.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -355,30 +355,30 @@ define void @rint() {
call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.rint.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.rint.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.rint.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.rint.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.rint.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.rint.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.rint.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.rint.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.rint.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.rint.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.rint.f32(float undef)
call <2 x float> @llvm.rint.v2f32(<2 x float> undef)
call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.rint.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.rint.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.rint.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.rint.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.rint.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.rint.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.rint.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.rint.f64(double undef)
call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
call <16 x double> @llvm.rint.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.rint.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.rint.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.rint.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.rint.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.rint.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -414,11 +414,11 @@ define void @rint_fp16() {
call <4 x half> @llvm.rint.v4f16(<4 x half> undef)
call <8 x half> @llvm.rint.v8f16(<8 x half> undef)
call <16 x half> @llvm.rint.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.rint.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.rint.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.rint.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.rint.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.rint.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -460,30 +460,30 @@ define void @lrint() {
call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
call i64 @llvm.lrint.i64.f32(float undef)
call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef)
call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef)
- call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
call i64 @llvm.lrint.i64.f64(double undef)
call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef)
call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef)
call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef)
call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef)
- call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -506,11 +506,11 @@ define void @lrint_fp16() {
call <4 x i64> @llvm.lrint.v4f16(<4 x half> undef)
call <8 x i64> @llvm.lrint.v8f16(<8 x half> undef)
call <16 x i64> @llvm.lrint.v16f16(<16 x half> undef)
- call <vscale x 1 x i64> @llvm.lrint.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x i64> @llvm.lrint.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x i64> @llvm.lrint.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x i64> @llvm.lrint.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x i64> @llvm.lrint.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x i64> @llvm.lrint.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x i64> @llvm.lrint.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x i64> @llvm.lrint.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x i64> @llvm.lrint.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x i64> @llvm.lrint.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -552,30 +552,30 @@ define void @llrint() {
call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
call i64 @llvm.llrint.i64.f32(float undef)
call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
- call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
call i64 @llvm.llrint.i64.f64(double undef)
call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef)
- call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -598,11 +598,11 @@ define void @llrint_fp16() {
call <4 x i64> @llvm.llrint.v4f16(<4 x half> undef)
call <8 x i64> @llvm.llrint.v8f16(<8 x half> undef)
call <16 x i64> @llvm.llrint.v16f16(<16 x half> undef)
- call <vscale x 1 x i64> @llvm.llrint.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x i64> @llvm.llrint.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x i64> @llvm.llrint.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x i64> @llvm.llrint.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x i64> @llvm.llrint.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x i64> @llvm.llrint.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x i64> @llvm.llrint.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x i64> @llvm.llrint.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x i64> @llvm.llrint.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x i64> @llvm.llrint.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -644,30 +644,30 @@ define void @nearbyint() {
call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.nearbyint.f32(float undef)
call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef)
call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.nearbyint.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.nearbyint.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.nearbyint.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.nearbyint.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.nearbyint.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.nearbyint.f64(double undef)
call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.nearbyint.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.nearbyint.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.nearbyint.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.nearbyint.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -703,11 +703,11 @@ define void @nearbyint_fp16() {
call <4 x half> @llvm.nearbyint.v4f16(<4 x half> undef)
call <8 x half> @llvm.nearbyint.v8f16(<8 x half> undef)
call <16 x half> @llvm.nearbyint.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.nearbyint.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.nearbyint.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.nearbyint.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.nearbyint.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.nearbyint.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -749,30 +749,30 @@ define void @round() {
call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.round.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.round.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.round.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.round.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.round.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.round.f32(float undef)
call <2 x float> @llvm.round.v2f32(<2 x float> undef)
call <4 x float> @llvm.round.v4f32(<4 x float> undef)
call <8 x float> @llvm.round.v8f32(<8 x float> undef)
call <16 x float> @llvm.round.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.round.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.round.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.round.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.round.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.round.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.round.f64(double undef)
call <2 x double> @llvm.round.v2f64(<2 x double> undef)
call <4 x double> @llvm.round.v4f64(<4 x double> undef)
call <8 x double> @llvm.round.v8f64(<8 x double> undef)
call <16 x double> @llvm.round.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.round.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.round.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.round.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.round.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -808,11 +808,11 @@ define void @round_fp16() {
call <4 x half> @llvm.round.v4f16(<4 x half> undef)
call <8 x half> @llvm.round.v8f16(<8 x half> undef)
call <16 x half> @llvm.round.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.round.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.round.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.round.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.round.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.round.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -854,30 +854,30 @@ define void @roundeven() {
call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef)
call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef)
call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef)
- call <vscale x 1 x bfloat> @llvm.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef)
+ call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef)
+ call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef)
+ call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef)
+ call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef)
+ call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef)
call float @llvm.roundeven.f32(float undef)
call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef)
call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
- call <vscale x 1 x float> @llvm.roundeven.nvx1f32(<vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.roundeven.nvx2f32(<vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.roundeven.nvx4f32(<vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.roundeven.nvx8f32(<vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.roundeven.nvx16f32(<vscale x 16 x float> undef)
+ call <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float> undef)
+ call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> undef)
+ call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> undef)
+ call <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float> undef)
+ call <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float> undef)
call double @llvm.roundeven.f64(double undef)
call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef)
- call <vscale x 1 x double> @llvm.roundeven.nvx1f64(<vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.roundeven.nvx2f64(<vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.roundeven.nvx4f64(<vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.roundeven.nvx8f64(<vscale x 8 x double> undef)
+ call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> undef)
+ call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> undef)
+ call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> undef)
+ call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> undef)
ret void
}
@@ -913,11 +913,11 @@ define void @roundeven_fp16() {
call <4 x half> @llvm.roundeven.v4f16(<4 x half> undef)
call <8 x half> @llvm.roundeven.v8f16(<8 x half> undef)
call <16 x half> @llvm.roundeven.v16f16(<16 x half> undef)
- call <vscale x 1 x half> @llvm.roundeven.nvx1f16(<vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.roundeven.nvx2f16(<vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.roundeven.nvx4f16(<vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.roundeven.nvx8f16(<vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.roundeven.nvx16f16(<vscale x 16 x half> undef)
+ call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> undef)
+ call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> undef)
+ call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> undef)
+ call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> undef)
+ call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> undef)
ret void
}
@@ -955,28 +955,28 @@ define void @vp_ceil() {
call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.ceil.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.ceil.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.ceil.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.ceil.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.ceil.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.ceil.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.ceil.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.ceil.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.ceil.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.ceil.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.ceil.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.ceil.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.ceil.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.ceil.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1009,11 +1009,11 @@ define void @vp_ceil_f16() {
call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.ceil.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.ceil.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.ceil.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.ceil.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.ceil.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1051,28 +1051,28 @@ define void @vp_floor() {
call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.floor.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.floor.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.floor.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.floor.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.floor.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.floor.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.floor.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.floor.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.floor.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.floor.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.floor.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.floor.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.floor.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.floor.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1105,11 +1105,11 @@ define void @vp_floor_f16() {
call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.floor.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.floor.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.floor.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.floor.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.floor.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1147,28 +1147,28 @@ define void @vp_round() {
call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.round.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.round.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.round.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.round.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.round.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.round.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.round.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.round.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.round.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.round.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.round.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.round.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.round.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.round.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1201,11 +1201,11 @@ define void @vp_round_f16() {
call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.round.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.round.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.round.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.round.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.round.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1243,28 +1243,28 @@ define void @vp_roundeven() {
call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.roundeven.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.roundeven.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.roundeven.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.roundeven.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.roundeven.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.roundeven.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.roundeven.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.roundeven.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.roundeven.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1297,11 +1297,11 @@ define void @vp_roundeven_f16() {
call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.roundeven.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.roundeven.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.roundeven.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.roundeven.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.roundeven.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1339,28 +1339,28 @@ define void @vp_roundtozero() {
call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.roundtozero.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.roundtozero.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.roundtozero.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.roundtozero.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.roundtozero.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.roundtozero.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.roundtozero.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.roundtozero.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.roundtozero.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.roundtozero.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1393,11 +1393,11 @@ define void @vp_roundtozero_f16() {
call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.roundtozero.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.roundtozero.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.roundtozero.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.roundtozero.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.roundtozero.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1435,28 +1435,28 @@ define void @vp_rint() {
call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.rint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.rint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.rint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.rint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.rint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.rint.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.rint.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.rint.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.rint.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.rint.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.rint.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.rint.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.rint.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.rint.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1489,11 +1489,11 @@ define void @vp_rint_f16() {
call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.rint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.rint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.rint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.rint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.rint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -1531,28 +1531,28 @@ define void @vp_nearbyint() {
call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x float> @llvm.vp.nearbyint.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x float> @llvm.vp.nearbyint.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x float> @llvm.vp.nearbyint.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x float> @llvm.vp.nearbyint.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x float> @llvm.vp.nearbyint.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x double> @llvm.vp.nearbyint.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x double> @llvm.vp.nearbyint.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x double> @llvm.vp.nearbyint.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x double> @llvm.vp.nearbyint.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
@@ -1585,10 +1585,10 @@ define void @vp_nearbyint_f16() {
call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x half> @llvm.vp.nearbyint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x half> @llvm.vp.nearbyint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x half> @llvm.vp.nearbyint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x half> @llvm.vp.nearbyint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x half> @llvm.vp.nearbyint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
index b3b31d6f001ac4..ea05464b084086 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
@@ -42,31 +42,31 @@ define void @bswap() {
call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
- call <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
call i32 @llvm.bswap.i32(i32 undef)
call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
- call <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
call i64 @llvm.bswap.i64(i64 undef)
call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
- call <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64> undef)
- call <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64> undef)
+ call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
+ call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
ret void
}
@@ -119,41 +119,41 @@ define void @bitreverse() {
call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
- call <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
call i16 @llvm.bitreverse.i16(i16 undef)
call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
- call <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
call i32 @llvm.bitreverse.i32(i32 undef)
call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
- call <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
call i64 @llvm.bitreverse.i64(i64 undef)
call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
- call <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64> undef)
- call <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64> undef)
+ call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
+ call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
ret void
}
@@ -249,41 +249,41 @@ define void @ctpop() {
call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
- call <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
call i16 @llvm.ctpop.i16(i16 undef)
call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
- call <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
call i32 @llvm.ctpop.i32(i32 undef)
call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
- call <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
call i64 @llvm.ctpop.i64(i64 undef)
call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
- call <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64> undef)
- call <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64> undef)
+ call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
+ call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
ret void
}
@@ -322,29 +322,29 @@ define void @vp_bswap() {
call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -392,38 +392,38 @@ define void @vp_ctpop() {
call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -484,51 +484,51 @@ define void @vp_ctlz() {
call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
- call <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
+ call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -589,51 +589,51 @@ define void @vp_cttz() {
call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
- call <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
+ call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
- call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+ call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
- call <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
- call <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
- call <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
- call <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
- call <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+ call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+ call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+ call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+ call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+ call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -642,255 +642,255 @@ declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
declare i32 @llvm.bswap.i32(i32)
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
declare i64 @llvm.bswap.i64(i64)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64>)
declare i8 @llvm.bitreverse.i8(i8)
declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
-declare <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.bitreverse.i16(i16)
declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
declare i32 @llvm.bitreverse.i32(i32)
declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
declare i64 @llvm.bitreverse.i64(i64)
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64>)
declare i8 @llvm.ctpop.i8(i8)
declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
-declare <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.ctpop.i16(i16)
declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
declare i32 @llvm.ctpop.i32(i32)
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
declare i64 @llvm.ctpop.i64(i64)
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64>)
declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.ctpop.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.ctpop.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.ctpop.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.ctpop.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.ctpop.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
-declare <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
-declare <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
diff --git a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
index 730b7ffb53d60e..10474d227851f9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
@@ -49,40 +49,40 @@ define void @smax() {
call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 1 x i8> @llvm.smax.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.smax.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.smax.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.smax.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.smax.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.smax.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.smax.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 1 x i16> @llvm.smax.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.smax.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.smax.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.smax.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.smax.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.smax.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.smax.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 1 x i32> @llvm.smax.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.smax.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.smax.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.smax.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.smax.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.smax.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.smax.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 1 x i64> @llvm.smax.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.smax.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.smax.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.smax.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 1 x i64> @llvm.smax.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -134,40 +134,40 @@ define void @smin() {
call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 1 x i8> @llvm.smin.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.smin.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.smin.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.smin.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.smin.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.smin.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.smin.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 1 x i16> @llvm.smin.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.smin.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.smin.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.smin.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.smin.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.smin.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.smin.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 1 x i32> @llvm.smin.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.smin.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.smin.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.smin.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.smin.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.smin.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.smin.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 1 x i64> @llvm.smin.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.smin.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.smin.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.smin.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 1 x i64> @llvm.smin.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -219,40 +219,40 @@ define void @umax() {
call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 1 x i8> @llvm.umax.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.umax.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.umax.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.umax.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.umax.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.umax.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.umax.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 1 x i16> @llvm.umax.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.umax.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.umax.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.umax.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.umax.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.umax.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.umax.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 1 x i32> @llvm.umax.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.umax.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.umax.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.umax.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.umax.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.umax.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.umax.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 1 x i64> @llvm.umax.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.umax.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.umax.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.umax.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 1 x i64> @llvm.umax.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -304,40 +304,40 @@ define void @umin() {
call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 1 x i8> @llvm.umin.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
- call <vscale x 2 x i8> @llvm.umin.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.umin.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.umin.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.umin.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 1 x i8> @llvm.umin.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+ call <vscale x 2 x i8> @llvm.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.umin.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 1 x i16> @llvm.umin.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
- call <vscale x 2 x i16> @llvm.umin.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.umin.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.umin.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.umin.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 1 x i16> @llvm.umin.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+ call <vscale x 2 x i16> @llvm.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.umin.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 1 x i32> @llvm.umin.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
- call <vscale x 2 x i32> @llvm.umin.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.umin.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.umin.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.umin.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+ call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.umin.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.umin.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 1 x i64> @llvm.umin.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
- call <vscale x 2 x i64> @llvm.umin.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.umin.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.umin.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 1 x i64> @llvm.umin.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+ call <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -346,157 +346,157 @@ declare <2 x i8> @llvm.smax.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.smax.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.smax.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.smax.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.smax.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.smax.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.smax.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.smax.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.smax.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.smax.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.smax.i16(i16, i16)
declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.smax.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.smax.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.smax.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.smax.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.smax.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.smax.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.smax.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.smax.i32(i32, i32)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.smax.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.smax.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.smax.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.smax.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.smax.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.smax.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.smax.i64(i64, i64)
declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.smax.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.smax.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.smax.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.smax.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.smax.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.smax.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.smax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.smax.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.smax.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.smin.i8(i8, i8)
declare <2 x i8> @llvm.smin.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.smin.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.smin.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.smin.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.smin.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.smin.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.smin.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.smin.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.smin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.smin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.smin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.smin.i16(i16, i16)
declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.smin.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.smin.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.smin.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.smin.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.smin.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.smin.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.smin.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.smin.i32(i32, i32)
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.smin.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.smin.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.smin.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.smin.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.smin.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.smin.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.smin.i64(i64, i64)
declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.smin.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.smin.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.smin.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.smin.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.smin.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.smin.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.smin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.smin.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.smin.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.umax.i8(i8, i8)
declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.umax.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.umax.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.umax.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.umax.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.umax.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.umax.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.umax.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.umax.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.umax.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.umax.i16(i16, i16)
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.umax.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.umax.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.umax.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.umax.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.umax.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.umax.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.umax.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.umax.i32(i32, i32)
declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.umax.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.umax.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.umax.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.umax.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.umax.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.umax.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.umax.i64(i64, i64)
declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.umax.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.umax.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.umax.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.umax.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.umax.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.umax.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.umax.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.umax.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.umin.i8(i8, i8)
declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.umin.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.umin.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.umin.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.umin.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.umin.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.umin.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.umin.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.umin.i16(i16, i16)
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.umin.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.umin.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.umin.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.umin.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.umin.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.umin.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.umin.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.umin.i32(i32, i32)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.umin.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.umin.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.umin.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.umin.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.umin.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.umin.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.umin.i64(i64, i64)
declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.umin.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.umin.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.umin.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.umin.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.umin.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.umin.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.umin.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.umin.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
index 185fcc9ce8b33c..be6b7c57d22523 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
@@ -45,36 +45,36 @@ define void @sadd.sat() {
call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.sadd.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.sadd.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.sadd.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.sadd.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.sadd.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.sadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.sadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.sadd.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.sadd.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.sadd.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.sadd.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.sadd.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.sadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.sadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.sadd.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.sadd.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.sadd.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.sadd.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.sadd.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.sadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.sadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.sadd.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.sadd.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.sadd.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.sadd.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.sadd.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -122,36 +122,36 @@ define void @uadd.sat() {
call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.uadd.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.uadd.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.uadd.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.uadd.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.uadd.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.uadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.uadd.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.uadd.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.uadd.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.uadd.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.uadd.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.uadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.uadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.uadd.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.uadd.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.uadd.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.uadd.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.uadd.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.uadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.uadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.uadd.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.uadd.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.uadd.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.uadd.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.uadd.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -199,36 +199,36 @@ define void @usub.sat() {
call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.usub.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.usub.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.usub.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.usub.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.usub.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.usub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.usub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.usub.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.usub.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.usub.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.usub.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.usub.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.usub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.usub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.usub.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.usub.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.usub.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.usub.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.usub.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.usub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.usub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.usub.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.usub.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.usub.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.usub.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.usub.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -276,36 +276,36 @@ define void @ssub.sat() {
call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.ssub.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.ssub.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.ssub.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.ssub.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.ssub.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.ssub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.ssub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.ssub.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.ssub.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.ssub.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.ssub.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.ssub.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.ssub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.ssub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.ssub.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.ssub.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.ssub.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.ssub.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.ssub.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.ssub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.ssub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.ssub.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.ssub.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.ssub.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.ssub.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.ssub.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -353,36 +353,36 @@ define void @ushl.sat() {
call <4 x i8> @llvm.ushl.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.ushl.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.ushl.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.ushl.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.ushl.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.ushl.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.ushl.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.ushl.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.ushl.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.ushl.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.ushl.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.ushl.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.ushl.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.ushl.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.ushl.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.ushl.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.ushl.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.ushl.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.ushl.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.ushl.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.ushl.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.ushl.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.ushl.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.ushl.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.ushl.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.ushl.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.ushl.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.ushl.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.ushl.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.ushl.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.ushl.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.ushl.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.ushl.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.ushl.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.ushl.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.ushl.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.ushl.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.ushl.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.ushl.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.ushl.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.ushl.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.ushl.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -430,36 +430,36 @@ define void @sshl.sat() {
call <4 x i8> @llvm.sshl.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
call <8 x i8> @llvm.sshl.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
- call <vscale x 2 x i8> @llvm.sshl.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
- call <vscale x 4 x i8> @llvm.sshl.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
- call <vscale x 8 x i8> @llvm.sshl.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
- call <vscale x 16 x i8> @llvm.sshl.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+ call <vscale x 2 x i8> @llvm.sshl.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+ call <vscale x 4 x i8> @llvm.sshl.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+ call <vscale x 8 x i8> @llvm.sshl.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+ call <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
call i16 @llvm.sshl.sat.i16(i16 undef, i16 undef)
call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
call <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
call <16 x i16> @llvm.sshl.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
- call <vscale x 2 x i16> @llvm.sshl.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
- call <vscale x 4 x i16> @llvm.sshl.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
- call <vscale x 8 x i16> @llvm.sshl.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
- call <vscale x 16 x i16> @llvm.sshl.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+ call <vscale x 2 x i16> @llvm.sshl.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+ call <vscale x 4 x i16> @llvm.sshl.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+ call <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+ call <vscale x 16 x i16> @llvm.sshl.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
call i32 @llvm.sshl.sat.i32(i32 undef, i32 undef)
call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
call <8 x i32> @llvm.sshl.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
call <16 x i32> @llvm.sshl.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
- call <vscale x 2 x i32> @llvm.sshl.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
- call <vscale x 4 x i32> @llvm.sshl.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
- call <vscale x 8 x i32> @llvm.sshl.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
- call <vscale x 16 x i32> @llvm.sshl.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+ call <vscale x 2 x i32> @llvm.sshl.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+ call <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+ call <vscale x 8 x i32> @llvm.sshl.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+ call <vscale x 16 x i32> @llvm.sshl.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
call i64 @llvm.sshl.sat.i64(i64 undef, i64 undef)
call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
call <4 x i64> @llvm.sshl.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
call <8 x i64> @llvm.sshl.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
call <16 x i64> @llvm.sshl.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
- call <vscale x 2 x i64> @llvm.sshl.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
- call <vscale x 4 x i64> @llvm.sshl.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
- call <vscale x 8 x i64> @llvm.sshl.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+ call <vscale x 2 x i64> @llvm.sshl.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+ call <vscale x 4 x i64> @llvm.sshl.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+ call <vscale x 8 x i64> @llvm.sshl.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
ret void
}
@@ -468,213 +468,213 @@ declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.sadd.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.sadd.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.sadd.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.sadd.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.sadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.sadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.sadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.sadd.sat.i16(i16, i16)
declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.sadd.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.sadd.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.sadd.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.sadd.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.sadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.sadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.sadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.sadd.sat.i32(i32, i32)
declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.sadd.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.sadd.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.sadd.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.sadd.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.sadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.sadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.sadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.sadd.sat.i64(i64, i64)
declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.sadd.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.sadd.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.sadd.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.sadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.sadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.uadd.sat.i8(i8, i8)
declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.uadd.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.uadd.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.uadd.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.uadd.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.uadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.uadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.uadd.sat.i16(i16, i16)
declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.uadd.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.uadd.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.uadd.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.uadd.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.uadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.uadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.uadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.uadd.sat.i32(i32, i32)
declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.uadd.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.uadd.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.uadd.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.uadd.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.uadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.uadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.uadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.uadd.sat.i64(i64, i64)
declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.uadd.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.uadd.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.uadd.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.uadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.uadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.usub.sat.i8(i8, i8)
declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.usub.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.usub.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.usub.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.usub.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.usub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.usub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.usub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.usub.sat.i16(i16, i16)
declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.usub.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.usub.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.usub.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.usub.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.usub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.usub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.usub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.usub.sat.i32(i32, i32)
declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.usub.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.usub.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.usub.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.usub.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.usub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.usub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.usub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.usub.sat.i64(i64, i64)
declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.usub.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.usub.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.usub.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.usub.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.usub.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.usub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.usub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.ssub.sat.i8(i8, i8)
declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.ssub.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ssub.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ssub.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ssub.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.ssub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ssub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ssub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.ssub.sat.i16(i16, i16)
declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.ssub.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ssub.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ssub.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ssub.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.ssub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ssub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ssub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.ssub.sat.i32(i32, i32)
declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.ssub.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ssub.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ssub.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ssub.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.ssub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ssub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ssub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.ssub.sat.i64(i64, i64)
declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.ssub.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ssub.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ssub.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ssub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ssub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.ushl.sat.i8(i8, i8)
declare <2 x i8> @llvm.ushl.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.ushl.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.ushl.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.ushl.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ushl.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ushl.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ushl.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.ushl.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ushl.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ushl.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ushl.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.ushl.sat.i16(i16, i16)
declare <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.ushl.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.ushl.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.ushl.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ushl.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ushl.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ushl.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.ushl.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ushl.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ushl.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ushl.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.ushl.sat.i32(i32, i32)
declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.ushl.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.ushl.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.ushl.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ushl.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ushl.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ushl.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.ushl.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ushl.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ushl.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ushl.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.ushl.sat.i64(i64, i64)
declare <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.ushl.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.ushl.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.ushl.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.ushl.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ushl.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ushl.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.ushl.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ushl.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ushl.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
declare i8 @llvm.sshl.sat.i8(i8, i8)
declare <2 x i8> @llvm.sshl.sat.v2i8(<2 x i8>, <2 x i8>)
declare <4 x i8> @llvm.sshl.sat.v4i8(<4 x i8>, <4 x i8>)
declare <8 x i8> @llvm.sshl.sat.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.sshl.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.sshl.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.sshl.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.sshl.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.sshl.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.sshl.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.sshl.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare i16 @llvm.sshl.sat.i16(i16, i16)
declare <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16>, <2 x i16>)
declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i16> @llvm.sshl.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.sshl.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.sshl.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.sshl.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.sshl.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.sshl.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.sshl.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.sshl.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
declare i32 @llvm.sshl.sat.i32(i32, i32)
declare <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.sshl.sat.v8i32(<8 x i32>, <8 x i32>)
declare <16 x i32> @llvm.sshl.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.sshl.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.sshl.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.sshl.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.sshl.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.sshl.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.sshl.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.sshl.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
declare i64 @llvm.sshl.sat.i64(i64, i64)
declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.sshl.sat.v4i64(<4 x i64>, <4 x i64>)
declare <8 x i64> @llvm.sshl.sat.v8i64(<8 x i64>, <8 x i64>)
declare <16 x i64> @llvm.sshl.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.sshl.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.sshl.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.sshl.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.sshl.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.sshl.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.sshl.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index bb98508f239c1b..f940e688c73930 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -1501,6 +1501,113 @@ define void @vp_fadd(){
}
+define void @vp_fneg() {
+; CHECK-LABEL: 'vp_fneg'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = fneg <2 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = fneg <4 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t5 = fneg <8 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t7 = fneg <16 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t9 = fneg <2 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t11 = fneg <4 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13 = fneg <8 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15 = fneg <16 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t18 = fneg <vscale x 2 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = fneg <vscale x 4 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = fneg <vscale x 8 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t24 = fneg <vscale x 16 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = fneg <vscale x 2 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = fneg <vscale x 4 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = fneg <vscale x 8 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t32 = fneg <vscale x 16 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'vp_fneg'
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = fneg <2 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = fneg <4 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t5 = fneg <8 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t7 = fneg <16 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t9 = fneg <2 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t11 = fneg <4 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13 = fneg <8 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15 = fneg <16 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t18 = fneg <vscale x 2 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = fneg <vscale x 4 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = fneg <vscale x 8 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t24 = fneg <vscale x 16 x float> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = fneg <vscale x 2 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = fneg <vscale x 4 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = fneg <vscale x 8 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t32 = fneg <vscale x 16 x double> undef
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %t0 = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+ %t1 = fneg <2 x float> undef
+ %t2 = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+ %t3 = fneg <4 x float> undef
+ %t4 = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+ %t5 = fneg <8 x float> undef
+ %t6 = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+ %t7 = fneg <16 x float> undef
+ %t8 = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+ %t9 = fneg <2 x double> undef
+ %t10 = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+ %t11 = fneg <4 x double> undef
+ %t12 = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+ %t13 = fneg <8 x double> undef
+ %t14 = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+ %t15 = fneg <16 x double> undef
+ %t17 = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ %t18 = fneg <vscale x 2 x float> undef
+ %t19 = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ %t20 = fneg <vscale x 4 x float> undef
+ %t21 = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ %t22 = fneg <vscale x 8 x float> undef
+ %t23 = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ %t24 = fneg <vscale x 16 x float> undef
+ %t25 = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ %t26 = fneg <vscale x 2 x double> undef
+ %t27 = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ %t28 = fneg <vscale x 4 x double> undef
+ %t29 = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %t30 = fneg <vscale x 8 x double> undef
+ %t31 = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+ %t32 = fneg <vscale x 16 x double> undef
+
+ ret void
+}
+
declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
@@ -1772,3 +1879,4 @@ declare <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64>, <vscale x 1
declare <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
declare <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
declare <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
More information about the llvm-commits
mailing list