[llvm] c3edeaa - [Test] Rename the test function name suffix. NFC (#114504)

Thu Oct 31 22:49:38 PDT 2024

Author: LiqinWeng
Date: 2024-11-01T13:49:34+08:00
New Revision: c3edeaa61bf0e7faed6c26f693b4fcd9102ba1ec

URL: https://github.com/llvm/llvm-project/commit/c3edeaa61bf0e7faed6c26f693b4fcd9102ba1ec
DIFF: https://github.com/llvm/llvm-project/commit/c3edeaa61bf0e7faed6c26f693b4fcd9102ba1ec.diff

LOG: [Test] Rename the test function name suffix. NFC (#114504)

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
    llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
    llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
    llvm/test/Analysis/CostModel/RISCV/fround.ll
    llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
    llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
    llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index 6e4061a42bf9b8..0b2c8da4438da2 100644

--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -30,20 +30,20 @@ define void @fabs() {
   call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
   call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
   call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.fabs.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.fabs.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.fabs.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.fabs.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.fabs.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.fabs.f64(double undef)
   call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
   call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
   call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
   call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.fabs.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.fabs.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.fabs.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.fabs.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -65,10 +65,10 @@ define void @fabs_f16() {
   call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
   call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
   call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-  call <vscale x 2 x half> @llvm.fabs.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.fabs.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.fabs.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.fabs.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -100,20 +100,20 @@ define void @minnum() {
   call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.minnum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.minnum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.minnum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.minnum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.minnum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.minnum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.minnum.f64(double undef, double undef)
   call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.minnum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.minnum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.minnum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.minnum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.minnum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.minnum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -149,11 +149,11 @@ define void @minnum_f16() {
   call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.minnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.minnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.minnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.minnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.minnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }
 
@@ -185,20 +185,20 @@ define void @maxnum() {
   call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.maxnum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.maxnum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.maxnum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.maxnum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.maxnum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.maxnum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.maxnum.f64(double undef, double undef)
   call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.maxnum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.maxnum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.maxnum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.maxnum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.maxnum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.maxnum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -234,11 +234,11 @@ define void @maxnum_f16() {
   call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.maxnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.maxnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.maxnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.maxnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.maxnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }
 
@@ -270,20 +270,20 @@ define void @minimum() {
   call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.minimum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.minimum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.minimum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.minimum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.minimum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.minimum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.minimum.f64(double undef, double undef)
   call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.minimum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.minimum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.minimum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.minimum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.minimum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -319,11 +319,11 @@ define void @minimum_f16() {
   call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.minimum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.minimum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.minimum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.minimum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.minimum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }
 
@@ -355,20 +355,20 @@ define void @maximum() {
   call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.maximum.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.maximum.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.maximum.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.maximum.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.maximum.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.maximum.f64(double undef, double undef)
   call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.maximum.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.maximum.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.maximum.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.maximum.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -404,11 +404,11 @@ define void @maximum_f16() {
   call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.maximum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.maximum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.maximum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.maximum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.maximum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }
 
@@ -440,20 +440,20 @@ define void @copysign() {
   call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.copysign.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.copysign.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.copysign.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.copysign.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.copysign.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.copysign.f64(double undef, double undef)
   call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.copysign.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.copysign.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.copysign.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.copysign.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -489,10 +489,10 @@ define void @copysign_f16() {
   call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.copysign.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.copysign.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.copysign.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.copysign.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.copysign.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
index efe17f2b76a70e..be9c19dc59a852 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
@@ -39,29 +39,29 @@ define void @sqrt() {
   call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.sqrt.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.sqrt.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.sqrt.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.sqrt.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.sqrt.f32(float undef)
   call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
   call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
   call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
   call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.sqrt.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.sqrt.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.sqrt.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.sqrt.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.sqrt.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.sqrt.f64(double undef)
   call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
   call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
   call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
   call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.sqrt.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.sqrt.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.sqrt.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.sqrt.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -83,10 +83,10 @@ define void @sqrt_f16() {
   call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
   call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
   call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
-  call <vscale x 2 x half> @llvm.sqrt.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.sqrt.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.sqrt.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.sqrt.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -128,30 +128,30 @@ define void @pow() {
   call <4 x bfloat> @llvm.pow.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
   call <8 x bfloat> @llvm.pow.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
   call <16 x bfloat> @llvm.pow.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.pow.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.pow.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.pow.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.pow.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.pow.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.pow.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.pow.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.pow.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.pow.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.pow.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
   call float @llvm.pow.f32(float undef, float undef)
   call <2 x float> @llvm.pow.v2f32(<2 x float> undef, <2 x float> undef)
   call <4 x float> @llvm.pow.v4f32(<4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.pow.v8f32(<8 x float> undef, <8 x float> undef)
   call <16 x float> @llvm.pow.v16f32(<16 x float> undef, <16 x float> undef)
-  call <vscale x 1 x float> @llvm.pow.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.pow.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.pow.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.pow.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.pow.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.pow.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.pow.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.pow.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.pow.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
   call double @llvm.pow.f64(double undef, double undef)
   call <2 x double> @llvm.pow.v2f64(<2 x double> undef, <2 x double> undef)
   call <4 x double> @llvm.pow.v4f64(<4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.pow.v8f64(<8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.pow.v16f64(<16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x double> @llvm.pow.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.pow.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.pow.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.pow.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.pow.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.pow.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.pow.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -187,10 +187,10 @@ define void @pow_f16() {
   call <4 x half> @llvm.pow.v4f16(<4 x half> undef, <4 x half> undef)
   call <8 x half> @llvm.pow.v8f16(<8 x half> undef, <8 x half> undef)
   call <16 x half> @llvm.pow.v16f16(<16 x half> undef, <16 x half> undef)
-  call <vscale x 1 x half> @llvm.pow.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.pow.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.pow.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.pow.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.pow.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.pow.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.pow.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.pow.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.pow.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.pow.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll b/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
index 34d6c93f4577a7..ba5e40ca03b88a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-trig-log-exp.ll
@@ -40,30 +40,30 @@ define void @sin() {
   call <4 x bfloat> @llvm.sin.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.sin.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.sin.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.sin.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.sin.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.sin.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.sin.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.sin.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.sin.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.sin.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.sin.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.sin.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.sin.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.sin.f32(float undef)
   call <2 x float> @llvm.sin.v2f32(<2 x float> undef)
   call <4 x float> @llvm.sin.v4f32(<4 x float> undef)
   call <8 x float> @llvm.sin.v8f32(<8 x float> undef)
   call <16 x float> @llvm.sin.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.sin.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.sin.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.sin.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.sin.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.sin.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.sin.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.sin.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.sin.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.sin.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.sin.f64(double undef)
   call <2 x double> @llvm.sin.v2f64(<2 x double> undef)
   call <4 x double> @llvm.sin.v4f64(<4 x double> undef)
   call <8 x double> @llvm.sin.v8f64(<8 x double> undef)
   call <16 x double> @llvm.sin.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.sin.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.sin.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.sin.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.sin.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.sin.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.sin.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.sin.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -97,10 +97,10 @@ define void @sin_f16() {
   call <4 x half> @llvm.sin.v4f16(<4 x half> undef)
   call <8 x half> @llvm.sin.v8f16(<8 x half> undef)
   call <16 x half> @llvm.sin.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.sin.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.sin.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.sin.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.sin.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.sin.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.sin.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.sin.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.sin.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -142,30 +142,30 @@ define void @cos() {
   call <4 x bfloat> @llvm.cos.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.cos.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.cos.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.cos.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.cos.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.cos.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.cos.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.cos.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.cos.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.cos.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.cos.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.cos.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.cos.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.cos.f32(float undef)
   call <2 x float> @llvm.cos.v2f32(<2 x float> undef)
   call <4 x float> @llvm.cos.v4f32(<4 x float> undef)
   call <8 x float> @llvm.cos.v8f32(<8 x float> undef)
   call <16 x float> @llvm.cos.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.cos.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.cos.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.cos.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.cos.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.cos.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.cos.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.cos.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.cos.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.cos.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.cos.f64(double undef)
   call <2 x double> @llvm.cos.v2f64(<2 x double> undef)
   call <4 x double> @llvm.cos.v4f64(<4 x double> undef)
   call <8 x double> @llvm.cos.v8f64(<8 x double> undef)
   call <16 x double> @llvm.cos.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.cos.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.cos.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.cos.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.cos.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.cos.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.cos.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.cos.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -199,10 +199,10 @@ define void @cos_f16() {
   call <4 x half> @llvm.cos.v4f16(<4 x half> undef)
   call <8 x half> @llvm.cos.v8f16(<8 x half> undef)
   call <16 x half> @llvm.cos.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.cos.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.cos.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.cos.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.cos.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.cos.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.cos.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.cos.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.cos.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -244,30 +244,30 @@ define void @exp() {
   call <4 x bfloat> @llvm.exp.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.exp.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.exp.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.exp.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.exp.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.exp.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.exp.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.exp.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.exp.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.exp.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.exp.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.exp.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.exp.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.exp.f32(float undef)
   call <2 x float> @llvm.exp.v2f32(<2 x float> undef)
   call <4 x float> @llvm.exp.v4f32(<4 x float> undef)
   call <8 x float> @llvm.exp.v8f32(<8 x float> undef)
   call <16 x float> @llvm.exp.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.exp.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.exp.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.exp.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.exp.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.exp.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.exp.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.exp.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.exp.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.exp.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.exp.f64(double undef)
   call <2 x double> @llvm.exp.v2f64(<2 x double> undef)
   call <4 x double> @llvm.exp.v4f64(<4 x double> undef)
   call <8 x double> @llvm.exp.v8f64(<8 x double> undef)
   call <16 x double> @llvm.exp.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.exp.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.exp.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.exp.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.exp.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.exp.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.exp.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.exp.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -301,10 +301,10 @@ define void @exp_f16() {
   call <4 x half> @llvm.exp.v4f16(<4 x half> undef)
   call <8 x half> @llvm.exp.v8f16(<8 x half> undef)
   call <16 x half> @llvm.exp.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.exp.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.exp.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.exp.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.exp.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.exp.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.exp.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.exp.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.exp.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -346,30 +346,30 @@ define void @exp2() {
   call <4 x bfloat> @llvm.exp2.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.exp2.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.exp2.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.exp2.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.exp2.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.exp2.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.exp2.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.exp2.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.exp2.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.exp2.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.exp2.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.exp2.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.exp2.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.exp2.f32(float undef)
   call <2 x float> @llvm.exp2.v2f32(<2 x float> undef)
   call <4 x float> @llvm.exp2.v4f32(<4 x float> undef)
   call <8 x float> @llvm.exp2.v8f32(<8 x float> undef)
   call <16 x float> @llvm.exp2.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.exp2.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.exp2.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.exp2.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.exp2.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.exp2.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.exp2.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.exp2.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.exp2.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.exp2.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.exp2.f64(double undef)
   call <2 x double> @llvm.exp2.v2f64(<2 x double> undef)
   call <4 x double> @llvm.exp2.v4f64(<4 x double> undef)
   call <8 x double> @llvm.exp2.v8f64(<8 x double> undef)
   call <16 x double> @llvm.exp2.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.exp2.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.exp2.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.exp2.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.exp2.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.exp2.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.exp2.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.exp2.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -403,10 +403,10 @@ define void @exp2_f16() {
   call <4 x half> @llvm.exp2.v4f16(<4 x half> undef)
   call <8 x half> @llvm.exp2.v8f16(<8 x half> undef)
   call <16 x half> @llvm.exp2.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.exp2.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.exp2.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.exp2.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.exp2.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.exp2.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.exp2.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.exp2.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.exp2.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -448,30 +448,30 @@ define void @log() {
   call <4 x bfloat> @llvm.log.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.log.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.log.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.log.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.log.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.log.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.log.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.log.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.log.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.log.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.log.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.log.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.log.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.log.f32(float undef)
   call <2 x float> @llvm.log.v2f32(<2 x float> undef)
   call <4 x float> @llvm.log.v4f32(<4 x float> undef)
   call <8 x float> @llvm.log.v8f32(<8 x float> undef)
   call <16 x float> @llvm.log.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.log.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.log.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.log.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.log.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.log.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.log.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.log.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.log.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.log.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.log.f64(double undef)
   call <2 x double> @llvm.log.v2f64(<2 x double> undef)
   call <4 x double> @llvm.log.v4f64(<4 x double> undef)
   call <8 x double> @llvm.log.v8f64(<8 x double> undef)
   call <16 x double> @llvm.log.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.log.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.log.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.log.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.log.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.log.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.log.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.log.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -505,10 +505,10 @@ define void @log_f16() {
   call <4 x half> @llvm.log.v4f16(<4 x half> undef)
   call <8 x half> @llvm.log.v8f16(<8 x half> undef)
   call <16 x half> @llvm.log.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.log.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.log.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.log.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.log.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.log.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.log.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.log.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.log.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -550,30 +550,30 @@ define void @log10() {
   call <4 x bfloat> @llvm.log10.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.log10.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.log10.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.log10.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.log10.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.log10.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.log10.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.log10.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.log10.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.log10.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.log10.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.log10.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.log10.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.log10.f32(float undef)
   call <2 x float> @llvm.log10.v2f32(<2 x float> undef)
   call <4 x float> @llvm.log10.v4f32(<4 x float> undef)
   call <8 x float> @llvm.log10.v8f32(<8 x float> undef)
   call <16 x float> @llvm.log10.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.log10.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.log10.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.log10.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.log10.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.log10.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.log10.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.log10.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.log10.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.log10.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.log10.f64(double undef)
   call <2 x double> @llvm.log10.v2f64(<2 x double> undef)
   call <4 x double> @llvm.log10.v4f64(<4 x double> undef)
   call <8 x double> @llvm.log10.v8f64(<8 x double> undef)
   call <16 x double> @llvm.log10.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.log10.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.log10.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.log10.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.log10.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.log10.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.log10.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.log10.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -607,10 +607,10 @@ define void @log10_f16() {
   call <4 x half> @llvm.log10.v4f16(<4 x half> undef)
   call <8 x half> @llvm.log10.v8f16(<8 x half> undef)
   call <16 x half> @llvm.log10.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.log10.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.log10.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.log10.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.log10.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.log10.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.log10.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.log10.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.log10.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 
@@ -652,30 +652,30 @@ define void @log2() {
   call <4 x bfloat> @llvm.log2.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.log2.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.log2.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.log2.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.log2.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.log2.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.log2.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.log2.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.log2.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.log2.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.log2.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.log2.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.log2.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.log2.f32(float undef)
   call <2 x float> @llvm.log2.v2f32(<2 x float> undef)
   call <4 x float> @llvm.log2.v4f32(<4 x float> undef)
   call <8 x float> @llvm.log2.v8f32(<8 x float> undef)
   call <16 x float> @llvm.log2.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.log2.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.log2.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.log2.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.log2.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.log2.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.log2.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.log2.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.log2.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.log2.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.log2.f64(double undef)
   call <2 x double> @llvm.log2.v2f64(<2 x double> undef)
   call <4 x double> @llvm.log2.v4f64(<4 x double> undef)
   call <8 x double> @llvm.log2.v8f64(<8 x double> undef)
   call <16 x double> @llvm.log2.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.log2.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.log2.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.log2.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.log2.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.log2.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.log2.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.log2.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -709,10 +709,10 @@ define void @log2_f16() {
   call <4 x half> @llvm.log2.v4f16(<4 x half> undef)
   call <8 x half> @llvm.log2.v8f16(<8 x half> undef)
   call <16 x half> @llvm.log2.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.log2.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.log2.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.log2.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.log2.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 1 x half> @llvm.log2.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.log2.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.log2.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.log2.nxv8f16(<vscale x 8 x half> undef)
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index b0964983550814..a0818d487d151a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -40,30 +40,30 @@ define void @floor() {
   call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.floor.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.floor.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.floor.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.floor.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.floor.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.floor.f32(float undef)
   call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
   call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
   call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
   call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.floor.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.floor.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.floor.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.floor.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.floor.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.floor.f64(double undef)
   call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
   call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
   call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
   call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.floor.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.floor.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.floor.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.floor.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -99,11 +99,11 @@ define void @floor_fp16() {
   call <4 x half> @llvm.floor.v4f16(<4 x half> undef)
   call <8 x half> @llvm.floor.v8f16(<8 x half> undef)
   call <16 x half> @llvm.floor.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.floor.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.floor.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.floor.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.floor.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.floor.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -145,30 +145,30 @@ define void @ceil() {
   call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.ceil.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.ceil.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.ceil.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.ceil.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.ceil.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.ceil.f32(float undef)
   call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
   call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
   call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
   call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.ceil.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.ceil.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.ceil.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.ceil.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.ceil.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.ceil.f64(double undef)
   call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
   call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
   call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
   call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.ceil.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.ceil.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.ceil.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.ceil.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -204,11 +204,11 @@ define void @ceil_fp16() {
   call <4 x half> @llvm.ceil.v4f16(<4 x half> undef)
   call <8 x half> @llvm.ceil.v8f16(<8 x half> undef)
   call <16 x half> @llvm.ceil.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.ceil.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.ceil.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.ceil.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.ceil.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.ceil.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -250,30 +250,30 @@ define void @trunc() {
   call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.trunc.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.trunc.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.trunc.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.trunc.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.trunc.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.trunc.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.trunc.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.trunc.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.trunc.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.trunc.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.trunc.f32(float undef)
   call <2 x float> @llvm.trunc.v2f32(<2 x float> undef)
   call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
   call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
   call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.trunc.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.trunc.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.trunc.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.trunc.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.trunc.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.trunc.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.trunc.f64(double undef)
   call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
   call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
   call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
   call <16 x double> @llvm.trunc.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.trunc.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.trunc.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.trunc.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.trunc.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.trunc.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -309,11 +309,11 @@ define void @trunc_fp16() {
   call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
   call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
   call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.trunc.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.trunc.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.trunc.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.trunc.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.trunc.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -355,30 +355,30 @@ define void @rint() {
   call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.rint.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.rint.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.rint.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.rint.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.rint.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.rint.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.rint.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.rint.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.rint.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.rint.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.rint.f32(float undef)
   call <2 x float> @llvm.rint.v2f32(<2 x float> undef)
   call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
   call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
   call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.rint.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.rint.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.rint.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.rint.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.rint.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.rint.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.rint.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.rint.f64(double undef)
   call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
   call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
   call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
   call <16 x double> @llvm.rint.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.rint.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.rint.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.rint.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.rint.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.rint.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -414,11 +414,11 @@ define void @rint_fp16() {
   call <4 x half> @llvm.rint.v4f16(<4 x half> undef)
   call <8 x half> @llvm.rint.v8f16(<8 x half> undef)
   call <16 x half> @llvm.rint.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.rint.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.rint.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.rint.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.rint.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.rint.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -460,30 +460,30 @@ define void @lrint() {
   call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
   call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
   call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
   call i64 @llvm.lrint.i64.f32(float undef)
   call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
   call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
   call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef)
   call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef)
-  call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
   call i64 @llvm.lrint.i64.f64(double undef)
   call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef)
   call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef)
   call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef)
   call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef)
-  call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -506,11 +506,11 @@ define void @lrint_fp16() {
   call <4 x i64> @llvm.lrint.v4f16(<4 x half> undef)
   call <8 x i64> @llvm.lrint.v8f16(<8 x half> undef)
   call <16 x i64> @llvm.lrint.v16f16(<16 x half> undef)
-  call <vscale x 1 x i64> @llvm.lrint.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x i64> @llvm.lrint.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x i64> @llvm.lrint.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x i64> @llvm.lrint.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x i64> @llvm.lrint.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x i64> @llvm.lrint.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -552,30 +552,30 @@ define void @llrint() {
   call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
   call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
   call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
   call i64 @llvm.llrint.i64.f32(float undef)
   call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
   call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
   call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
   call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-  call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
   call i64 @llvm.llrint.i64.f64(double undef)
   call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
   call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
   call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
   call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef)
-  call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -598,11 +598,11 @@ define void @llrint_fp16() {
   call <4 x i64> @llvm.llrint.v4f16(<4 x half> undef)
   call <8 x i64> @llvm.llrint.v8f16(<8 x half> undef)
   call <16 x i64> @llvm.llrint.v16f16(<16 x half> undef)
-  call <vscale x 1 x i64> @llvm.llrint.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x i64> @llvm.llrint.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x i64> @llvm.llrint.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x i64> @llvm.llrint.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x i64> @llvm.llrint.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x i64> @llvm.llrint.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -644,30 +644,30 @@ define void @nearbyint() {
   call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.nearbyint.f32(float undef)
   call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef)
   call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
   call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
   call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.nearbyint.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.nearbyint.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.nearbyint.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.nearbyint.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.nearbyint.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.nearbyint.f64(double undef)
   call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
   call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
   call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
   call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.nearbyint.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.nearbyint.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.nearbyint.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.nearbyint.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -703,11 +703,11 @@ define void @nearbyint_fp16() {
   call <4 x half> @llvm.nearbyint.v4f16(<4 x half> undef)
   call <8 x half> @llvm.nearbyint.v8f16(<8 x half> undef)
   call <16 x half> @llvm.nearbyint.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.nearbyint.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.nearbyint.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.nearbyint.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.nearbyint.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.nearbyint.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -749,30 +749,30 @@ define void @round() {
   call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.round.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.round.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.round.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.round.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.round.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.round.f32(float undef)
   call <2 x float> @llvm.round.v2f32(<2 x float> undef)
   call <4 x float> @llvm.round.v4f32(<4 x float> undef)
   call <8 x float> @llvm.round.v8f32(<8 x float> undef)
   call <16 x float> @llvm.round.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.round.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.round.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.round.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.round.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.round.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.round.f64(double undef)
   call <2 x double> @llvm.round.v2f64(<2 x double> undef)
   call <4 x double> @llvm.round.v4f64(<4 x double> undef)
   call <8 x double> @llvm.round.v8f64(<8 x double> undef)
   call <16 x double> @llvm.round.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.round.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.round.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.round.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.round.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -808,11 +808,11 @@ define void @round_fp16() {
   call <4 x half> @llvm.round.v4f16(<4 x half> undef)
   call <8 x half> @llvm.round.v8f16(<8 x half> undef)
   call <16 x half> @llvm.round.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.round.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.round.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.round.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.round.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.round.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -854,30 +854,30 @@ define void @roundeven() {
   call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef)
   call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef)
   call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef)
-  call <vscale x 1 x bfloat> @llvm.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef)
-  call <vscale x 2 x bfloat> @llvm.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef)
-  call <vscale x 4 x bfloat> @llvm.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef)
-  call <vscale x 8 x bfloat> @llvm.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.roundeven.f32(float undef)
   call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef)
   call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
   call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
   call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
-  call <vscale x 1 x float> @llvm.roundeven.nvx1f32(<vscale x 1 x float> undef)
-  call <vscale x 2 x float> @llvm.roundeven.nvx2f32(<vscale x 2 x float> undef)
-  call <vscale x 4 x float> @llvm.roundeven.nvx4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.roundeven.nvx8f32(<vscale x 8 x float> undef)
-  call <vscale x 16 x float> @llvm.roundeven.nvx16f32(<vscale x 16 x float> undef)
+  call <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float> undef)
+  call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> undef)
+  call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> undef)
+  call <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float> undef)
+  call <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float> undef)
   call double @llvm.roundeven.f64(double undef)
   call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
   call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
   call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
   call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef)
-  call <vscale x 1 x double> @llvm.roundeven.nvx1f64(<vscale x 1 x double> undef)
-  call <vscale x 2 x double> @llvm.roundeven.nvx2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.roundeven.nvx4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x double> @llvm.roundeven.nvx8f64(<vscale x 8 x double> undef)
+  call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> undef)
+  call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> undef)
+  call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> undef)
+  call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> undef)
   ret void
 }
 
@@ -913,11 +913,11 @@ define void @roundeven_fp16() {
   call <4 x half> @llvm.roundeven.v4f16(<4 x half> undef)
   call <8 x half> @llvm.roundeven.v8f16(<8 x half> undef)
   call <16 x half> @llvm.roundeven.v16f16(<16 x half> undef)
-  call <vscale x 1 x half> @llvm.roundeven.nvx1f16(<vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.roundeven.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.roundeven.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.roundeven.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.roundeven.nvx16f16(<vscale x 16 x half> undef)
+  call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> undef)
   ret void
 }
 
@@ -955,28 +955,28 @@ define void @vp_ceil() {
   call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.ceil.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.ceil.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.ceil.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.ceil.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.ceil.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.ceil.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.ceil.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.ceil.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.ceil.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.ceil.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.ceil.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.ceil.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.ceil.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.ceil.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1009,11 +1009,11 @@ define void @vp_ceil_f16() {
   call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.ceil.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.ceil.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.ceil.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.ceil.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.ceil.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1051,28 +1051,28 @@ define void @vp_floor() {
   call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.floor.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.floor.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.floor.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.floor.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.floor.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.floor.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.floor.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.floor.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.floor.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.floor.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.floor.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.floor.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.floor.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.floor.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1105,11 +1105,11 @@ define void @vp_floor_f16() {
   call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.floor.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.floor.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.floor.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.floor.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.floor.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1147,28 +1147,28 @@ define void @vp_round() {
   call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.round.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.round.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.round.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.round.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.round.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.round.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.round.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.round.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.round.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.round.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.round.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.round.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.round.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.round.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1201,11 +1201,11 @@ define void @vp_round_f16() {
   call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.round.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.round.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.round.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.round.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.round.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1243,28 +1243,28 @@ define void @vp_roundeven() {
   call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.roundeven.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.roundeven.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.roundeven.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.roundeven.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.roundeven.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.roundeven.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.roundeven.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.roundeven.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.roundeven.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1297,11 +1297,11 @@ define void @vp_roundeven_f16() {
   call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.roundeven.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.roundeven.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.roundeven.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.roundeven.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.roundeven.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1339,28 +1339,28 @@ define void @vp_roundtozero() {
   call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.roundtozero.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.roundtozero.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.roundtozero.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.roundtozero.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.roundtozero.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.roundtozero.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.roundtozero.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.roundtozero.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.roundtozero.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.roundtozero.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1393,11 +1393,11 @@ define void @vp_roundtozero_f16() {
   call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.roundtozero.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.roundtozero.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.roundtozero.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.roundtozero.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.roundtozero.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1435,28 +1435,28 @@ define void @vp_rint() {
   call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.rint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.rint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.rint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.rint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.rint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.rint.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.rint.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.rint.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.rint.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.rint.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.rint.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.rint.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.rint.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.rint.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1489,11 +1489,11 @@ define void @vp_rint_f16() {
   call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.rint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.rint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.rint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.rint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.rint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1531,28 +1531,28 @@ define void @vp_nearbyint() {
   call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
   call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
   call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
   call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.nearbyint.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.nearbyint.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.nearbyint.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.nearbyint.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x float> @llvm.vp.nearbyint.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
   call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
   call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
   call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.nearbyint.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.nearbyint.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.nearbyint.nvx4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.nearbyint.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
 
@@ -1585,10 +1585,10 @@ define void @vp_nearbyint_f16() {
   call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
   call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
   call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x half> @llvm.vp.nearbyint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x half> @llvm.vp.nearbyint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x half> @llvm.vp.nearbyint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x half> @llvm.vp.nearbyint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x half> @llvm.vp.nearbyint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
index b3b31d6f001ac4..ea05464b084086 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
@@ -42,31 +42,31 @@ define void @bswap() {
   call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
   call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
   call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
   call i32 @llvm.bswap.i32(i32 undef)
   call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
   call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
   call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
   call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
   call i64 @llvm.bswap.i64(i64 undef)
   call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
   call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
   call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
   call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64> undef)
-  call <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64> undef)
+  call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
+  call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
   ret void
 }
 
@@ -119,41 +119,41 @@ define void @bitreverse() {
   call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
   call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
   call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
   call i16 @llvm.bitreverse.i16(i16 undef)
   call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
   call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
   call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
   call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
   call i32 @llvm.bitreverse.i32(i32 undef)
   call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
   call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
   call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
   call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
   call i64 @llvm.bitreverse.i64(i64 undef)
   call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
   call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
   call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
   call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64> undef)
-  call <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64> undef)
+  call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
+  call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
   ret void
 }
 
@@ -249,41 +249,41 @@ define void @ctpop() {
   call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
   call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
   call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
   call i16 @llvm.ctpop.i16(i16 undef)
   call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
   call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
   call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
   call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
   call i32 @llvm.ctpop.i32(i32 undef)
   call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
   call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
   call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
   call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
   call i64 @llvm.ctpop.i64(i64 undef)
   call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
   call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
   call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
   call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64> undef)
-  call <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64> undef)
+  call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
+  call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
   ret void
 }
 
@@ -322,29 +322,29 @@ define void @vp_bswap() {
   call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -392,38 +392,38 @@ define void @vp_ctpop() {
   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -484,51 +484,51 @@ define void @vp_ctlz() {
   call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
-  call <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
+  call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -589,51 +589,51 @@ define void @vp_cttz() {
   call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
-  call <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
+  call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
-  call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
+  call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
   call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
   call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
   call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
   call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
   call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
-  call <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -642,255 +642,255 @@ declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
 declare i32 @llvm.bswap.i32(i32)
 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
 declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
 declare i64 @llvm.bswap.i64(i64)
 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
 declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
 declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64>)
 
 declare i8 @llvm.bitreverse.i8(i8)
 declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
 declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
 declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
-declare <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
 declare i16 @llvm.bitreverse.i16(i16)
 declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
 declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
 declare i32 @llvm.bitreverse.i32(i32)
 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
 declare i64 @llvm.bitreverse.i64(i64)
 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
 declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
 declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64>)
 
 declare i8 @llvm.ctpop.i8(i8)
 declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
-declare <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
 declare i16 @llvm.ctpop.i16(i16)
 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
-declare <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
 declare i32 @llvm.ctpop.i32(i32)
 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
-declare <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
 declare i64 @llvm.ctpop.i64(i64)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
-declare <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64>)
-declare <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64>)
+declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)
+declare <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64>)
 
 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
 declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
 declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
 declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
 declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
 declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
 declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
 
 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.ctpop.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.ctpop.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.ctpop.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.ctpop.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.ctpop.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
 
 declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
 declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
 declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
 declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
-declare <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
 declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
 declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
 declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
 
 declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
 declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
 declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
 declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
-declare <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
+declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
 declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
-declare <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
+declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
 declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
 declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
+declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
index 730b7ffb53d60e..10474d227851f9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll
@@ -49,40 +49,40 @@ define void @smax() {
   call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.smax.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.smax.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.smax.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.smax.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.smax.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.smax.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.smax.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.smax.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.smax.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.smax.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.smax.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.smax.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.smax.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.smax.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.smax.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.smax.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.smax.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.smax.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.smax.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.smax.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.smax.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.smax.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.smax.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.smax.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.smax.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 1 x i64> @llvm.smax.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -134,40 +134,40 @@ define void @smin() {
   call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.smin.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.smin.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.smin.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.smin.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.smin.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.smin.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.smin.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.smin.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.smin.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.smin.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.smin.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.smin.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.smin.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.smin.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.smin.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.smin.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.smin.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.smin.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.smin.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.smin.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.smin.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.smin.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.smin.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.smin.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.smin.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 1 x i64> @llvm.smin.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -219,40 +219,40 @@ define void @umax() {
   call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.umax.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.umax.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.umax.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.umax.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.umax.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.umax.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.umax.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.umax.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.umax.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.umax.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.umax.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.umax.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.umax.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.umax.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.umax.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.umax.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.umax.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.umax.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.umax.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.umax.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.umax.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.umax.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.umax.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.umax.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.umax.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 1 x i64> @llvm.umax.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -304,40 +304,40 @@ define void @umin() {
   call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 1 x i8> @llvm.umin.nvx1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
-  call <vscale x 2 x i8> @llvm.umin.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.umin.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.umin.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.umin.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 1 x i8> @llvm.umin.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef)
+  call <vscale x 2 x i8> @llvm.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.umin.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 1 x i16> @llvm.umin.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
-  call <vscale x 2 x i16> @llvm.umin.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.umin.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.umin.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.umin.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 1 x i16> @llvm.umin.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef)
+  call <vscale x 2 x i16> @llvm.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.umin.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 1 x i32> @llvm.umin.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
-  call <vscale x 2 x i32> @llvm.umin.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.umin.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.umin.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.umin.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
+  call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.umin.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.umin.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 1 x i64> @llvm.umin.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
-  call <vscale x 2 x i64> @llvm.umin.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.umin.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.umin.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 1 x i64> @llvm.umin.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef)
+  call <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -346,157 +346,157 @@ declare <2 x i8> @llvm.smax.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.smax.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.smax.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.smax.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.smax.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.smax.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.smax.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.smax.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.smax.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.smax.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.smax.i16(i16, i16)
 declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.smax.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.smax.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.smax.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.smax.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.smax.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.smax.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.smax.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.smax.i32(i32, i32)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.smax.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.smax.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.smax.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.smax.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.smax.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.smax.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.smax.i64(i64, i64)
 declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.smax.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.smax.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.smax.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.smax.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.smax.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.smax.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.smax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.smax.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.smax.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.smin.i8(i8, i8)
 declare <2 x i8> @llvm.smin.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.smin.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.smin.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.smin.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.smin.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.smin.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.smin.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.smin.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.smin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.smin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.smin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.smin.i16(i16, i16)
 declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.smin.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.smin.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.smin.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.smin.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.smin.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.smin.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.smin.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.smin.i32(i32, i32)
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.smin.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.smin.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.smin.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.smin.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.smin.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.smin.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.smin.i64(i64, i64)
 declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.smin.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.smin.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.smin.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.smin.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.smin.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.smin.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.smin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.smin.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.smin.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.umax.i8(i8, i8)
 declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.umax.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.umax.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.umax.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.umax.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.umax.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.umax.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.umax.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.umax.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.umax.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.umax.i16(i16, i16)
 declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.umax.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.umax.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.umax.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.umax.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.umax.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.umax.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.umax.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.umax.i32(i32, i32)
 declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.umax.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.umax.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.umax.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.umax.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.umax.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.umax.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.umax.i64(i64, i64)
 declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.umax.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.umax.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.umax.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.umax.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.umax.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.umax.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.umax.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.umax.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.umin.i8(i8, i8)
 declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 1 x i8> @llvm.umin.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
-declare <vscale x 2 x i8> @llvm.umin.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.umin.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.umin.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.umin.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 1 x i8> @llvm.umin.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
+declare <vscale x 2 x i8> @llvm.umin.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.umin.i16(i16, i16)
 declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 1 x i16> @llvm.umin.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
-declare <vscale x 2 x i16> @llvm.umin.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.umin.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.umin.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.umin.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 1 x i16> @llvm.umin.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>)
+declare <vscale x 2 x i16> @llvm.umin.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.umin.i32(i32, i32)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 1 x i32> @llvm.umin.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
-declare <vscale x 2 x i32> @llvm.umin.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.umin.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.umin.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.umin.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>)
+declare <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.umin.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.umin.i64(i64, i64)
 declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.umin.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 1 x i64> @llvm.umin.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
-declare <vscale x 2 x i64> @llvm.umin.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.umin.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.umin.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 1 x i64> @llvm.umin.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>)
+declare <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.umin.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.umin.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
index 185fcc9ce8b33c..be6b7c57d22523 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll
@@ -45,36 +45,36 @@ define void @sadd.sat() {
   call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.sadd.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.sadd.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.sadd.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.sadd.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.sadd.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.sadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.sadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.sadd.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.sadd.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.sadd.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.sadd.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.sadd.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.sadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.sadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.sadd.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.sadd.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.sadd.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.sadd.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.sadd.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.sadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.sadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.sadd.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.sadd.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.sadd.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.sadd.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.sadd.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -122,36 +122,36 @@ define void @uadd.sat() {
   call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.uadd.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.uadd.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.uadd.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.uadd.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.uadd.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.uadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.uadd.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.uadd.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.uadd.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.uadd.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.uadd.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.uadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.uadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.uadd.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.uadd.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.uadd.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.uadd.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.uadd.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.uadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.uadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.uadd.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.uadd.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.uadd.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.uadd.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.uadd.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -199,36 +199,36 @@ define void @usub.sat() {
   call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.usub.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.usub.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.usub.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.usub.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.usub.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.usub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.usub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.usub.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.usub.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.usub.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.usub.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.usub.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.usub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.usub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.usub.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.usub.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.usub.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.usub.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.usub.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.usub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.usub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.usub.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.usub.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.usub.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.usub.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.usub.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -276,36 +276,36 @@ define void @ssub.sat() {
   call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.ssub.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.ssub.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.ssub.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.ssub.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.ssub.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.ssub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.ssub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.ssub.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.ssub.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.ssub.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.ssub.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.ssub.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.ssub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.ssub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.ssub.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.ssub.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.ssub.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.ssub.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.ssub.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.ssub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.ssub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.ssub.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.ssub.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.ssub.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.ssub.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.ssub.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -353,36 +353,36 @@ define void @ushl.sat() {
   call <4 x i8> @llvm.ushl.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.ushl.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.ushl.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.ushl.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.ushl.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.ushl.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.ushl.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.ushl.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.ushl.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.ushl.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.ushl.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.ushl.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.ushl.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.ushl.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.ushl.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.ushl.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.ushl.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.ushl.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.ushl.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.ushl.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.ushl.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.ushl.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.ushl.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.ushl.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.ushl.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.ushl.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.ushl.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.ushl.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.ushl.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.ushl.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.ushl.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.ushl.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.ushl.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.ushl.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.ushl.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.ushl.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.ushl.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.ushl.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.ushl.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.ushl.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.ushl.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.ushl.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -430,36 +430,36 @@ define void @sshl.sat() {
   call <4 x i8> @llvm.sshl.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
   call <8 x i8> @llvm.sshl.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
   call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  call <vscale x 2 x i8> @llvm.sshl.sat.nvx2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
-  call <vscale x 4 x i8> @llvm.sshl.sat.nvx4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
-  call <vscale x 8 x i8> @llvm.sshl.sat.nvx8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
-  call <vscale x 16 x i8> @llvm.sshl.sat.nvx16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+  call <vscale x 2 x i8> @llvm.sshl.sat.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef)
+  call <vscale x 4 x i8> @llvm.sshl.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef)
+  call <vscale x 8 x i8> @llvm.sshl.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef)
+  call <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
   call i16 @llvm.sshl.sat.i16(i16 undef, i16 undef)
   call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
   call <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
   call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
   call <16 x i16> @llvm.sshl.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  call <vscale x 2 x i16> @llvm.sshl.sat.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
-  call <vscale x 4 x i16> @llvm.sshl.sat.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
-  call <vscale x 8 x i16> @llvm.sshl.sat.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.sshl.sat.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  call <vscale x 2 x i16> @llvm.sshl.sat.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef)
+  call <vscale x 4 x i16> @llvm.sshl.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
+  call <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+  call <vscale x 16 x i16> @llvm.sshl.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
   call i32 @llvm.sshl.sat.i32(i32 undef, i32 undef)
   call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
   call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
   call <8 x i32> @llvm.sshl.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
   call <16 x i32> @llvm.sshl.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  call <vscale x 2 x i32> @llvm.sshl.sat.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
-  call <vscale x 4 x i32> @llvm.sshl.sat.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.sshl.sat.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
-  call <vscale x 16 x i32> @llvm.sshl.sat.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
+  call <vscale x 2 x i32> @llvm.sshl.sat.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
+  call <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+  call <vscale x 8 x i32> @llvm.sshl.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
+  call <vscale x 16 x i32> @llvm.sshl.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef)
   call i64 @llvm.sshl.sat.i64(i64 undef, i64 undef)
   call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
   call <4 x i64> @llvm.sshl.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
   call <8 x i64> @llvm.sshl.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
   call <16 x i64> @llvm.sshl.sat.v16i64(<16 x i64> undef, <16 x i64> undef)
-  call <vscale x 2 x i64> @llvm.sshl.sat.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.sshl.sat.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
-  call <vscale x 8 x i64> @llvm.sshl.sat.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
+  call <vscale x 2 x i64> @llvm.sshl.sat.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+  call <vscale x 4 x i64> @llvm.sshl.sat.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef)
+  call <vscale x 8 x i64> @llvm.sshl.sat.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef)
   ret void
 }
 
@@ -468,213 +468,213 @@ declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.sadd.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.sadd.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.sadd.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.sadd.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.sadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.sadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.sadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.sadd.sat.i16(i16, i16)
 declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.sadd.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.sadd.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.sadd.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.sadd.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.sadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.sadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.sadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.sadd.sat.i32(i32, i32)
 declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.sadd.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.sadd.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.sadd.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.sadd.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.sadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.sadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.sadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.sadd.sat.i64(i64, i64)
 declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.sadd.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.sadd.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.sadd.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.sadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.sadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.uadd.sat.i8(i8, i8)
 declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.uadd.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.uadd.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.uadd.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.uadd.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.uadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.uadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.uadd.sat.i16(i16, i16)
 declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.uadd.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.uadd.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.uadd.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.uadd.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.uadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.uadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.uadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.uadd.sat.i32(i32, i32)
 declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.uadd.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.uadd.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.uadd.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.uadd.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.uadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.uadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.uadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.uadd.sat.i64(i64, i64)
 declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.uadd.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.uadd.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.uadd.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.uadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.uadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.usub.sat.i8(i8, i8)
 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.usub.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.usub.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.usub.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.usub.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.usub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.usub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.usub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.usub.sat.i16(i16, i16)
 declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.usub.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.usub.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.usub.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.usub.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.usub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.usub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.usub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.usub.sat.i32(i32, i32)
 declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.usub.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.usub.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.usub.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.usub.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.usub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.usub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.usub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.usub.sat.i64(i64, i64)
 declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.usub.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.usub.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.usub.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.usub.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.usub.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.usub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.usub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.ssub.sat.i8(i8, i8)
 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.ssub.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ssub.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ssub.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ssub.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.ssub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ssub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ssub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.ssub.sat.i16(i16, i16)
 declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.ssub.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ssub.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ssub.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ssub.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.ssub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ssub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ssub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.ssub.sat.i32(i32, i32)
 declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.ssub.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ssub.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ssub.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ssub.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.ssub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ssub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ssub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.ssub.sat.i64(i64, i64)
 declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.ssub.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ssub.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ssub.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ssub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ssub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.ushl.sat.i8(i8, i8)
 declare <2 x i8> @llvm.ushl.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.ushl.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.ushl.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.ushl.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.ushl.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.ushl.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.ushl.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.ushl.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.ushl.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.ushl.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.ushl.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.ushl.sat.i16(i16, i16)
 declare <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.ushl.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.ushl.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.ushl.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.ushl.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.ushl.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.ushl.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.ushl.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.ushl.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.ushl.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.ushl.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.ushl.sat.i32(i32, i32)
 declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.ushl.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.ushl.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.ushl.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.ushl.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.ushl.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.ushl.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.ushl.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.ushl.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.ushl.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.ushl.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.ushl.sat.i64(i64, i64)
 declare <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.ushl.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.ushl.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.ushl.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.ushl.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.ushl.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.ushl.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.ushl.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.ushl.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.ushl.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
 
 declare i8 @llvm.sshl.sat.i8(i8, i8)
 declare <2 x i8> @llvm.sshl.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.sshl.sat.v4i8(<4 x i8>, <4 x i8>)
 declare <8 x i8> @llvm.sshl.sat.v8i8(<8 x i8>, <8 x i8>)
 declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)
-declare <vscale x 2 x i8> @llvm.sshl.sat.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.sshl.sat.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.sshl.sat.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.sshl.sat.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i8> @llvm.sshl.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.sshl.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.sshl.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare i16 @llvm.sshl.sat.i16(i16, i16)
 declare <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16>, <2 x i16>)
 declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
 declare <16 x i16> @llvm.sshl.sat.v16i16(<16 x i16>, <16 x i16>)
-declare <vscale x 2 x i16> @llvm.sshl.sat.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.sshl.sat.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.sshl.sat.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.sshl.sat.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
+declare <vscale x 2 x i16> @llvm.sshl.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.sshl.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.sshl.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
 declare i32 @llvm.sshl.sat.i32(i32, i32)
 declare <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32>, <2 x i32>)
 declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
 declare <8 x i32> @llvm.sshl.sat.v8i32(<8 x i32>, <8 x i32>)
 declare <16 x i32> @llvm.sshl.sat.v16i32(<16 x i32>, <16 x i32>)
-declare <vscale x 2 x i32> @llvm.sshl.sat.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.sshl.sat.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.sshl.sat.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 16 x i32> @llvm.sshl.sat.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
+declare <vscale x 2 x i32> @llvm.sshl.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.sshl.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
+declare <vscale x 16 x i32> @llvm.sshl.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
 declare i64 @llvm.sshl.sat.i64(i64, i64)
 declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.sshl.sat.v4i64(<4 x i64>, <4 x i64>)
 declare <8 x i64> @llvm.sshl.sat.v8i64(<8 x i64>, <8 x i64>)
 declare <16 x i64> @llvm.sshl.sat.v16i64(<16 x i64>, <16 x i64>)
-declare <vscale x 2 x i64> @llvm.sshl.sat.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.sshl.sat.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-declare <vscale x 8 x i64> @llvm.sshl.sat.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
+declare <vscale x 2 x i64> @llvm.sshl.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.sshl.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 8 x i64> @llvm.sshl.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)