[llvm-branch-commits] [llvm] 6da0033 - [RISCV] Define vsext/vzext intrinsics.
Zakk Chen via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 29 17:07:06 PST 2020
Author: Zakk Chen
Date: 2020-12-29T16:50:53-08:00
New Revision: 6da00336248ca725f5f817337e8486c234ace187
URL: https://github.com/llvm/llvm-project/commit/6da00336248ca725f5f817337e8486c234ace187
DIFF: https://github.com/llvm/llvm-project/commit/6da00336248ca725f5f817337e8486c234ace187.diff
LOG: [RISCV] Define vsext/vzext intrinsics.
Define vsext/vzext intrinsics.and lower to V instructions.
Define new fraction register class fields in LMULInfo and a
NoReg to present invalid LMUL register classes.
Authored-by: ShihPo Hung <shihpo.hung at sifive.com>
Co-Authored-by: Zakk Chen <zakk.chen at sifive.com>
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D93893
Added:
llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
Modified:
llvm/include/llvm/IR/IntrinsicsRISCV.td
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index d72dc5a4dd59..8e3d6f2ed675 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -389,10 +389,24 @@ let TargetPrefix = "riscv" in {
: Intrinsic<[llvm_anyint_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
[IntrNoMem]>, RISCVVIntrinsic;
- // For mask unary operations with mask type in/out without mask
- // Output: (mask type output)
- // Input: (mask type vector_in, vl)
- class RISCVMaskUnaryMOutNoMask
+ // For destination vector type is NOT the same as source vector.
+ // Input: (vector_in, vl)
+ class RISCVUnaryABNoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is NOT the same as source vector (with mask).
+ // Input: (maskedoff, vector_in, mask, vl)
+ class RISCVUnaryABMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
+ llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For unary operations with the same vector type in/out without mask
+ // Output: (vector)
+ // Input: (vector_in, vl)
+ class RISCVUnaryNoMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
@@ -490,8 +504,12 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVMaskUnarySOutNoMask;
def "int_riscv_" # NAME # "_mask" : RISCVMaskUnarySOutMask;
}
+ multiclass RISCVUnaryAB {
+ def "int_riscv_" # NAME : RISCVUnaryABNoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask;
+ }
multiclass RISCVMaskUnaryMOut {
- def "int_riscv_" # NAME : RISCVMaskUnaryMOutNoMask;
+ def "int_riscv_" # NAME : RISCVUnaryNoMask;
def "int_riscv_" # NAME # "_mask" : RISCVMaskUnaryMOutMask;
}
@@ -517,6 +535,9 @@ let TargetPrefix = "riscv" in {
defm vwsubu_w : RISCVBinaryAAX;
defm vwsub_w : RISCVBinaryAAX;
+ defm vzext : RISCVUnaryAB;
+ defm vsext : RISCVUnaryAB;
+
defm vadc : RISCVBinaryWithV0;
defm vmadc_carry_in : RISCVBinaryMaskOutWithV0;
defm vmadc : RISCVBinaryMaskOut;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e6e3846d7c9a..b0e3b455339f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -40,22 +40,26 @@ def NoX0 : SDNodeXForm<undef,
//===----------------------------------------------------------------------===//
// This class describes information associated to the LMUL.
-class LMULInfo<int lmul, VReg regclass, VReg wregclass, string mx> {
+class LMULInfo<int lmul, VReg regclass, VReg wregclass,
+ VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
bits<3> value = lmul; // This is encoded as the vlmul field of vtype.
VReg vrclass = regclass;
VReg wvrclass = wregclass;
+ VReg f8vrclass = f8regclass;
+ VReg f4vrclass = f4regclass;
+ VReg f2vrclass = f2regclass;
string MX = mx;
}
// Associate LMUL with tablegen records of register classes.
-def V_M1 : LMULInfo<0b000, VR, VRM2, "M1">;
-def V_M2 : LMULInfo<0b001, VRM2, VRM4, "M2">;
-def V_M4 : LMULInfo<0b010, VRM4, VRM8, "M4">;
-def V_M8 : LMULInfo<0b011, VRM8, VR, "M8">;
+def V_M1 : LMULInfo<0b000, VR, VRM2, VR, VR, VR, "M1">;
+def V_M2 : LMULInfo<0b001, VRM2, VRM4, VR, VR, VR, "M2">;
+def V_M4 : LMULInfo<0b010, VRM4, VRM8, VRM2, VR, VR, "M4">;
+def V_M8 : LMULInfo<0b011, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">;
-def V_MF8 : LMULInfo<0b101, VR, VR, "MF8">;
-def V_MF4 : LMULInfo<0b110, VR, VR, "MF4">;
-def V_MF2 : LMULInfo<0b111, VR, VR, "MF2">;
+def V_MF8 : LMULInfo<0b101, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">;
+def V_MF4 : LMULInfo<0b110, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">;
+def V_MF2 : LMULInfo<0b111, VR, VR, VR, VR,/*NoVReg*/VR, "MF2">;
// Used to iterate over all possible LMULs.
def MxList {
@@ -214,6 +218,12 @@ class VTypeInfoToWide<VTypeInfo vti, VTypeInfo wti>
VTypeInfo Wti = wti;
}
+class VTypeInfoToFraction<VTypeInfo vti, VTypeInfo fti>
+{
+ VTypeInfo Vti = vti;
+ VTypeInfo Fti = fti;
+}
+
defset list<VTypeInfoToWide> AllWidenableIntVectors = {
def : VTypeInfoToWide<VI8MF8, VI16MF4>;
def : VTypeInfoToWide<VI8MF4, VI16MF2>;
@@ -247,6 +257,43 @@ defset list<VTypeInfoToWide> AllWidenableFloatVectors = {
def : VTypeInfoToWide<VF32M4, VF64M8>;
}
+defset list<VTypeInfoToFraction> AllFractionableVF2IntVectors = {
+ def : VTypeInfoToFraction<VI16MF4, VI8MF8>;
+ def : VTypeInfoToFraction<VI16MF2, VI8MF4>;
+ def : VTypeInfoToFraction<VI16M1, VI8MF2>;
+ def : VTypeInfoToFraction<VI16M2, VI8M1>;
+ def : VTypeInfoToFraction<VI16M4, VI8M2>;
+ def : VTypeInfoToFraction<VI16M8, VI8M4>;
+ def : VTypeInfoToFraction<VI32MF2, VI16MF4>;
+ def : VTypeInfoToFraction<VI32M1, VI16MF2>;
+ def : VTypeInfoToFraction<VI32M2, VI16M1>;
+ def : VTypeInfoToFraction<VI32M4, VI16M2>;
+ def : VTypeInfoToFraction<VI32M8, VI16M4>;
+ def : VTypeInfoToFraction<VI64M1, VI32MF2>;
+ def : VTypeInfoToFraction<VI64M2, VI32M1>;
+ def : VTypeInfoToFraction<VI64M4, VI32M2>;
+ def : VTypeInfoToFraction<VI64M8, VI32M4>;
+}
+
+defset list<VTypeInfoToFraction> AllFractionableVF4IntVectors = {
+ def : VTypeInfoToFraction<VI32MF2, VI8MF8>;
+ def : VTypeInfoToFraction<VI32M1, VI8MF4>;
+ def : VTypeInfoToFraction<VI32M2, VI8MF2>;
+ def : VTypeInfoToFraction<VI32M4, VI8M1>;
+ def : VTypeInfoToFraction<VI32M8, VI8M2>;
+ def : VTypeInfoToFraction<VI64M1, VI16MF4>;
+ def : VTypeInfoToFraction<VI64M2, VI16MF2>;
+ def : VTypeInfoToFraction<VI64M4, VI16M1>;
+ def : VTypeInfoToFraction<VI64M8, VI16M2>;
+}
+
+defset list<VTypeInfoToFraction> AllFractionableVF8IntVectors = {
+ def : VTypeInfoToFraction<VI64M1, VI8MF8>;
+ def : VTypeInfoToFraction<VI64M2, VI8MF4>;
+ def : VTypeInfoToFraction<VI64M4, VI8MF2>;
+ def : VTypeInfoToFraction<VI64M8, VI8M1>;
+}
+
// This class holds the record of the RISCVVPseudoTable below.
// This represents the information we need in codegen for each pseudo.
// The definition should be consistent with `struct PseudoInfo` in
@@ -997,6 +1044,42 @@ multiclass VPseudoUnaryV_F_NoDummyMask {
}
}
+multiclass PseudoUnaryV_VF2 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[1-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f2vrclass,
+ constraints>;
+ }
+ }
+}
+
+multiclass PseudoUnaryV_VF4 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[2-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f4vrclass,
+ constraints>;
+ }
+ }
+}
+
+multiclass PseudoUnaryV_VF8 {
+ defvar constraints = "@earlyclobber $rd";
+ foreach m = MxList.m[3-6] in
+ {
+ let VLMul = m.value in {
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f8vrclass,
+ constraints>;
+ }
+ }
+}
+
// The destination EEW is 1.
// The source EEW is 8, 16, 32, or 64.
// When the destination EEW is
diff erent from source EEW, we need to use
@@ -1477,6 +1560,22 @@ multiclass VPatUnaryV_M<string intrinsic, string instruction>
}
}
+multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
+ list<VTypeInfoToFraction> fractionList>
+{
+ foreach vtiTofti = fractionList in
+ {
+ defvar vti = vtiTofti.Vti;
+ defvar fti = vtiTofti.Fti;
+ def : VPatUnaryNoMask<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector,
+ vti.SEW, vti.LMul, fti.RegClass>;
+ def : VPatUnaryMask<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass, fti.RegClass>;
+ }
+}
+
multiclass VPatNullaryV<string intrinsic, string instruction>
{
foreach vti = AllIntegerVectors in {
@@ -2140,6 +2239,16 @@ defm PseudoVWSUBU : VPseudoBinaryW_WV_WX;
defm PseudoVWADD : VPseudoBinaryW_WV_WX;
defm PseudoVWSUB : VPseudoBinaryW_WV_WX;
+//===----------------------------------------------------------------------===//
+// 12.3. Vector Integer Extension
+//===----------------------------------------------------------------------===//
+defm PseudoVZEXT_VF2 : PseudoUnaryV_VF2;
+defm PseudoVZEXT_VF4 : PseudoUnaryV_VF4;
+defm PseudoVZEXT_VF8 : PseudoUnaryV_VF8;
+defm PseudoVSEXT_VF2 : PseudoUnaryV_VF2;
+defm PseudoVSEXT_VF4 : PseudoUnaryV_VF4;
+defm PseudoVSEXT_VF8 : PseudoUnaryV_VF8;
+
//===----------------------------------------------------------------------===//
// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
//===----------------------------------------------------------------------===//
@@ -2648,6 +2757,22 @@ defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsubu_w", "PseudoVWSUBU", AllWidenableIn
defm "" : VPatBinaryW_WV_WX<"int_riscv_vwadd_w", "PseudoVWADD", AllWidenableIntVectors>;
defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsub_w", "PseudoVWSUB", AllWidenableIntVectors>;
+//===----------------------------------------------------------------------===//
+// 12.3. Vector Integer Extension
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF2",
+ AllFractionableVF2IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF4",
+ AllFractionableVF4IntVectors>;
+defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF8",
+ AllFractionableVF8IntVectors>;
+
//===----------------------------------------------------------------------===//
// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
new file mode 100644
index 000000000000..d4ed1471fdbc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
@@ -0,0 +1,664 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vsext_vf4_nxv1i32(<vscale x 1 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vsext_mask_vf4_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vsext_vf4_nxv2i32(<vscale x 2 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vsext_mask_vf4_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vsext_vf4_nxv4i32(<vscale x 4 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vsext_mask_vf4_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vsext_vf4_nxv8i32(<vscale x 8 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vsext_mask_vf4_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vsext_vf4_nxv16i32(<vscale x 16 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vsext_mask_vf4_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu
+; CHECK-NEXT: vle8.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vsext_vf2_nxv1i32(<vscale x 1 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vsext_mask_vf2_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vsext_vf2_nxv2i32(<vscale x 2 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vsext_mask_vf2_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vsext_vf2_nxv4i32(<vscale x 4 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vsext_mask_vf2_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vsext_vf2_nxv8i32(<vscale x 8 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vsext_mask_vf2_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vsext_vf2_nxv16i32(<vscale x 16 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vsext_mask_vf2_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu
+; CHECK-NEXT: vle16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vsext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8>,
+ i32);
+
+define <vscale x 1 x i16> @intrinsic_vsext_vf2_nxv1i16(<vscale x 1 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vsext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vsext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i16> @intrinsic_vsext_mask_vf2_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vsext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vsext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8>,
+ i32);
+
+define <vscale x 2 x i16> @intrinsic_vsext_vf2_nxv2i16(<vscale x 2 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vsext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vsext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i16> @intrinsic_vsext_mask_vf2_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vsext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vsext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8>,
+ i32);
+
+define <vscale x 4 x i16> @intrinsic_vsext_vf2_nxv4i16(<vscale x 4 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vsext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vsext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i16> @intrinsic_vsext_mask_vf2_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vsext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8>,
+ i32);
+
+define <vscale x 8 x i16> @intrinsic_vsext_vf2_nxv8i16(<vscale x 8 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i16> @intrinsic_vsext_mask_vf2_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8>,
+ i32);
+
+define <vscale x 16 x i16> @intrinsic_vsext_vf2_nxv16i16(<vscale x 16 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vsext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i16> @intrinsic_vsext_mask_vf2_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vsext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8>,
+ i32);
+
+define <vscale x 32 x i16> @intrinsic_vsext_vf2_nxv32i16(<vscale x 32 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vsext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x i16> @intrinsic_vsext_mask_vf2_nxv32i16(<vscale x 32 x i1> %0, <vscale x 32 x i16> %1, <vscale x 32 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu
+; CHECK-NEXT: vle8.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vsext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 32 x i16> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
new file mode 100644
index 000000000000..499625e479ff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
@@ -0,0 +1,1162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_vf8_nxv1i64(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf8_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf8 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i8(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_mask_vf8_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf8_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vsext.vf8 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i8(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_vf8_nxv2i64(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf8_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf8 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i8(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_mask_vf8_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf8_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vsext.vf8 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i8(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_vf8_nxv4i64(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf8_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf8 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i8(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_mask_vf8_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf8_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vsext.vf8 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i8(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_vf8_nxv8i64(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf8_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i8(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_mask_vf8_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf8_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vsext.vf8 v16, v25, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i8(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i16(
+ <vscale x 1 x i16>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_vf4_nxv1i64(<vscale x 1 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i16(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_mask_vf4_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i16(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i16(
+ <vscale x 2 x i16>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_vf4_nxv2i64(<vscale x 2 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i16(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_mask_vf4_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i16(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i16(
+ <vscale x 4 x i16>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_vf4_nxv4i64(<vscale x 4 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i16(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_mask_vf4_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i16(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i16(
+ <vscale x 8 x i16>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_vf4_nxv8i64(<vscale x 8 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i16(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_mask_vf4_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m2,ta,mu
+; CHECK-NEXT: vle16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i16(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vsext_vf4_nxv1i32(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vsext_mask_vf4_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vsext_vf4_nxv2i32(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vsext_mask_vf4_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vsext_vf4_nxv4i32(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vsext_mask_vf4_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vsext_vf4_nxv8i32(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vsext_mask_vf4_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vsext_vf4_nxv16i32(<vscale x 16 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vsext_mask_vf4_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu
+; CHECK-NEXT: vle8.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vsext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32(
+ <vscale x 1 x i32>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_vf2_nxv1i64(<vscale x 1 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32(
+ <vscale x 1 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i32(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vsext_mask_vf2_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vsext.mask.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i32(
+ <vscale x 2 x i32>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_vf2_nxv2i64(<vscale x 2 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.nxv2i64.nxv2i32(
+ <vscale x 2 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i32(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vsext_mask_vf2_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vsext.mask.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i32(
+ <vscale x 4 x i32>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_vf2_nxv4i64(<vscale x 4 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.nxv4i64.nxv4i32(
+ <vscale x 4 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i32(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i32>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vsext_mask_vf2_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vsext.mask.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i32(
+ <vscale x 8 x i32>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_vf2_nxv8i64(<vscale x 8 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.nxv8i64.nxv8i32(
+ <vscale x 8 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i32(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i32>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vsext_mask_vf2_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu
+; CHECK-NEXT: vle32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vsext.mask.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vsext_vf2_nxv1i32(<vscale x 1 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vsext_mask_vf2_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vsext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vsext_vf2_nxv2i32(<vscale x 2 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vsext_mask_vf2_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vsext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vsext_vf2_nxv4i32(<vscale x 4 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vsext_mask_vf2_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vsext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vsext_vf2_nxv8i32(<vscale x 8 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vsext_mask_vf2_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vsext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vsext_vf2_nxv16i32(<vscale x 16 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vsext_mask_vf2_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu
+; CHECK-NEXT: vle16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vsext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vsext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i16> @intrinsic_vsext_vf2_nxv1i16(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vsext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vsext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i16> @intrinsic_vsext_mask_vf2_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vsext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vsext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i16> @intrinsic_vsext_vf2_nxv2i16(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vsext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vsext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i16> @intrinsic_vsext_mask_vf2_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vsext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vsext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i16> @intrinsic_vsext_vf2_nxv4i16(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vsext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vsext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i16> @intrinsic_vsext_mask_vf2_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vsext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i16> @intrinsic_vsext_vf2_nxv8i16(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vsext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i16> @intrinsic_vsext_mask_vf2_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vsext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8>,
+ i64);
+
+define <vscale x 16 x i16> @intrinsic_vsext_vf2_nxv16i16(<vscale x 16 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vsext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vsext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i16> @intrinsic_vsext_mask_vf2_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vsext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8>,
+ i64);
+
+define <vscale x 32 x i16> @intrinsic_vsext_vf2_nxv32i16(<vscale x 32 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsext_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vsext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vsext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x i16> @intrinsic_vsext_mask_vf2_nxv32i16(<vscale x 32 x i1> %0, <vscale x 32 x i16> %1, <vscale x 32 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsext_mask_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu
+; CHECK-NEXT: vle8.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vsext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vsext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 32 x i16> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
new file mode 100644
index 000000000000..45a1ad0ccb70
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
@@ -0,0 +1,664 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vzext_vf4_nxv1i32(<vscale x 1 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vzext_mask_vf4_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vzext_vf4_nxv2i32(<vscale x 2 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vzext_mask_vf4_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vzext_vf4_nxv4i32(<vscale x 4 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vzext_mask_vf4_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vzext_vf4_nxv8i32(<vscale x 8 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vzext_mask_vf4_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vzext_vf4_nxv16i32(<vscale x 16 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vzext_mask_vf4_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu
+; CHECK-NEXT: vle8.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vzext_vf2_nxv1i32(<vscale x 1 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i32> @intrinsic_vzext_mask_vf2_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vzext_vf2_nxv2i32(<vscale x 2 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i32> @intrinsic_vzext_mask_vf2_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vzext_vf2_nxv4i32(<vscale x 4 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i32> @intrinsic_vzext_mask_vf2_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vzext_vf2_nxv8i32(<vscale x 8 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i32> @intrinsic_vzext_mask_vf2_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vzext_vf2_nxv16i32(<vscale x 16 x i16> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i32> @intrinsic_vzext_mask_vf2_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i16> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu
+; CHECK-NEXT: vle16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vzext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8>,
+ i32);
+
+define <vscale x 1 x i16> @intrinsic_vzext_vf2_nxv1i16(<vscale x 1 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vzext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vzext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i16> @intrinsic_vzext_mask_vf2_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vzext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vzext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8>,
+ i32);
+
+define <vscale x 2 x i16> @intrinsic_vzext_vf2_nxv2i16(<vscale x 2 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vzext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vzext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i16> @intrinsic_vzext_mask_vf2_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vzext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vzext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8>,
+ i32);
+
+define <vscale x 4 x i16> @intrinsic_vzext_vf2_nxv4i16(<vscale x 4 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vzext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vzext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i16> @intrinsic_vzext_mask_vf2_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vzext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8>,
+ i32);
+
+define <vscale x 8 x i16> @intrinsic_vzext_vf2_nxv8i16(<vscale x 8 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vzext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i16> @intrinsic_vzext_mask_vf2_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vzext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8>,
+ i32);
+
+define <vscale x 16 x i16> @intrinsic_vzext_vf2_nxv16i16(<vscale x 16 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vzext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x i16> @intrinsic_vzext_mask_vf2_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vzext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8>,
+ i32);
+
+define <vscale x 32 x i16> @intrinsic_vzext_vf2_nxv32i16(<vscale x 32 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8> %0,
+ i32 %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vzext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x i16> @intrinsic_vzext_mask_vf2_nxv32i16(<vscale x 32 x i1> %0, <vscale x 32 x i16> %1, <vscale x 32 x i8> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu
+; CHECK-NEXT: vle8.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vzext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %0,
+ i32 %3)
+
+ ret <vscale x 32 x i16> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
new file mode 100644
index 000000000000..29f02658b8ca
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
@@ -0,0 +1,1162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_vf8_nxv1i64(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf8_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf8 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i8(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_mask_vf8_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf8_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vzext.vf8 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i8(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_vf8_nxv2i64(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf8_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf8 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i8(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_mask_vf8_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf8_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vzext.vf8 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i8(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_vf8_nxv4i64(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf8_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf8 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i8(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_mask_vf8_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf8_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vzext.vf8 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i8(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_vf8_nxv8i64(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf8_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i8(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_mask_vf8_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf8_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vzext.vf8 v16, v25, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i8(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i16(
+ <vscale x 1 x i16>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_vf4_nxv1i64(<vscale x 1 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i16(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_mask_vf4_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i16(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i16(
+ <vscale x 2 x i16>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_vf4_nxv2i64(<vscale x 2 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i16(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_mask_vf4_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i16(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i16(
+ <vscale x 4 x i16>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_vf4_nxv4i64(<vscale x 4 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i16(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_mask_vf4_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i16(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i16(
+ <vscale x 8 x i16>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_vf4_nxv8i64(<vscale x 8 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i16(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_mask_vf4_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m2,ta,mu
+; CHECK-NEXT: vle16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i16(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vzext_vf4_nxv1i32(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vzext_mask_vf4_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i8(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vzext_vf4_nxv2i32(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vzext_mask_vf4_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i8(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vzext_vf4_nxv4i32(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf4 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vzext_mask_vf4_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i8(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vzext_vf4_nxv8i32(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf4 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vzext_mask_vf4_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i8(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vzext_vf4_nxv16i32(<vscale x 16 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vzext_mask_vf4_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf4_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m2,ta,mu
+; CHECK-NEXT: vle8.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vzext.vf4 v16, v26, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i8(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32(
+ <vscale x 1 x i32>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_vf2_nxv1i64(<vscale x 1 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32(
+ <vscale x 1 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i32(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i32>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i64> @intrinsic_vzext_mask_vf2_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv1i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vzext.mask.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i32(
+ <vscale x 2 x i32>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_vf2_nxv2i64(<vscale x 2 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.nxv2i64.nxv2i32(
+ <vscale x 2 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i32(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i64> @intrinsic_vzext_mask_vf2_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vzext.mask.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i32(
+ <vscale x 4 x i32>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_vf2_nxv4i64(<vscale x 4 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.nxv4i64.nxv4i32(
+ <vscale x 4 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i32(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i32>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i64> @intrinsic_vzext_mask_vf2_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vzext.mask.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i32(
+ <vscale x 8 x i32>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_vf2_nxv8i64(<vscale x 8 x i32> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.nxv8i64.nxv8i32(
+ <vscale x 8 x i32> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i32(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i32>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i64> @intrinsic_vzext_mask_vf2_nxv8i64(<vscale x 8 x i1> %0, <vscale x 8 x i64> %1, <vscale x 8 x i32> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m4,ta,mu
+; CHECK-NEXT: vle32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vzext.mask.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vzext_vf2_nxv1i32(<vscale x 1 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.nxv1i32.nxv1i16(
+ <vscale x 1 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i32> @intrinsic_vzext_mask_vf2_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vzext.mask.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vzext_vf2_nxv2i32(<vscale x 2 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i32> @intrinsic_vzext_mask_vf2_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vzext.mask.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vzext_vf2_nxv4i32(<vscale x 4 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.nxv4i32.nxv4i16(
+ <vscale x 4 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i32> @intrinsic_vzext_mask_vf2_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vzext.mask.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vzext_vf2_nxv8i32(<vscale x 8 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.nxv8i32.nxv8i16(
+ <vscale x 8 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i32> @intrinsic_vzext_mask_vf2_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vzext.mask.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vzext_vf2_nxv16i32(<vscale x 16 x i16> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.nxv16i32.nxv16i16(
+ <vscale x 16 x i16> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i32> @intrinsic_vzext_mask_vf2_nxv16i32(<vscale x 16 x i1> %0, <vscale x 16 x i32> %1, <vscale x 16 x i16> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m4,ta,mu
+; CHECK-NEXT: vle16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i32> @llvm.riscv.vzext.mask.nxv16i32.nxv16i16(
+ <vscale x 16 x i32> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vzext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8>,
+ i64);
+
+define <vscale x 1 x i16> @intrinsic_vzext_vf2_nxv1i16(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vzext.nxv1i16.nxv1i8(
+ <vscale x 1 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vzext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16>,
+ <vscale x 1 x i8>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x i16> @intrinsic_vzext_mask_vf2_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vzext.mask.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %2,
+ <vscale x 1 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vzext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8>,
+ i64);
+
+define <vscale x 2 x i16> @intrinsic_vzext_vf2_nxv2i16(<vscale x 2 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vzext.nxv2i16.nxv2i8(
+ <vscale x 2 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vzext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i8>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x i16> @intrinsic_vzext_mask_vf2_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vzext.mask.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %2,
+ <vscale x 2 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vzext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8>,
+ i64);
+
+define <vscale x 4 x i16> @intrinsic_vzext_vf2_nxv4i16(<vscale x 4 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v25, v16
+; CHECK-NEXT: vmv1r.v v16, v25
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vzext.nxv4i16.nxv4i8(
+ <vscale x 4 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vzext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x i16> @intrinsic_vzext_mask_vf2_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vzext.mask.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 4 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8>,
+ i64);
+
+define <vscale x 8 x i16> @intrinsic_vzext_vf2_nxv8i16(<vscale x 8 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v26, v16
+; CHECK-NEXT: vmv2r.v v16, v26
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vzext.nxv8i16.nxv8i8(
+ <vscale x 8 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vzext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x i16> @intrinsic_vzext_mask_vf2_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vzext.mask.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 8 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8>,
+ i64);
+
+define <vscale x 16 x i16> @intrinsic_vzext_vf2_nxv16i16(<vscale x 16 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v28, v16
+; CHECK-NEXT: vmv4r.v v16, v28
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vzext.nxv16i16.nxv16i8(
+ <vscale x 16 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vzext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x i16> @intrinsic_vzext_mask_vf2_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vzext.mask.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 16 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8>,
+ i64);
+
+define <vscale x 32 x i16> @intrinsic_vzext_vf2_nxv32i16(<vscale x 32 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vzext_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v16
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vzext.nxv32i16.nxv32i8(
+ <vscale x 32 x i8> %0,
+ i64 %1)
+
+ ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vzext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x i16> @intrinsic_vzext_mask_vf2_nxv32i16(<vscale x 32 x i1> %0, <vscale x 32 x i16> %1, <vscale x 32 x i8> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vzext_mask_vf2_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e8,m4,ta,mu
+; CHECK-NEXT: vle8.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vzext.vf2 v16, v28, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vzext.mask.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 32 x i1> %0,
+ i64 %3)
+
+ ret <vscale x 32 x i16> %a
+}
More information about the llvm-branch-commits
mailing list