[llvm] 1aa4f0b - [RISCV][VP] Add RVV codegen for vp.trunc.

Thu Apr 14 19:30:16 PDT 2022

Author: jacquesguan
Date: 2022-04-15T02:29:53Z
New Revision: 1aa4f0bb6cc21b7666718f5534c88d03152ddfb1

URL: https://github.com/llvm/llvm-project/commit/1aa4f0bb6cc21b7666718f5534c88d03152ddfb1
DIFF: https://github.com/llvm/llvm-project/commit/1aa4f0bb6cc21b7666718f5534c88d03152ddfb1.diff

LOG: [RISCV][VP] Add RVV codegen for vp.trunc.

Differential Revision: https://reviews.llvm.org/D123579

Added: 
    llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
    llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c10401e878840..fc931b964f7fc 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -492,7 +492,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
         ISD::VP_MERGE,       ISD::VP_SELECT,      ISD::VP_FPTOSI,
         ISD::VP_FPTOUI,      ISD::VP_SETCC,       ISD::VP_SEXT,
-        ISD::VP_ZEXT};
+        ISD::VP_ZEXT,        ISD::VP_TRUNC};
 
     static const unsigned FloatingPointVPOps[] = {
         ISD::VP_FADD,        ISD::VP_FSUB,
@@ -579,6 +579,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
       setOperationAction(ISD::VP_FPTOSI, VT, Custom);
       setOperationAction(ISD::VP_FPTOUI, VT, Custom);
+      setOperationAction(ISD::VP_TRUNC, VT, Custom);
     }
 
     for (MVT VT : IntVecVTs) {
@@ -859,6 +860,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           setOperationAction(ISD::VP_FPTOSI, VT, Custom);
           setOperationAction(ISD::VP_FPTOUI, VT, Custom);
           setOperationAction(ISD::VP_SETCC, VT, Custom);
+          setOperationAction(ISD::VP_TRUNC, VT, Custom);
           continue;
         }
 
@@ -3167,55 +3169,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     }
     return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
   }
-  case ISD::TRUNCATE: {
-    SDLoc DL(Op);
-    MVT VT = Op.getSimpleValueType();
+  case ISD::TRUNCATE:
     // Only custom-lower vector truncates
-    if (!VT.isVector())
+    if (!Op.getSimpleValueType().isVector())
       return Op;
-
-    // Truncates to mask types are handled 
diff erently
-    if (VT.getVectorElementType() == MVT::i1)
-      return lowerVectorMaskTrunc(Op, DAG);
-
-    // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
-    // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
-    // truncate by one power of two at a time.
-    MVT DstEltVT = VT.getVectorElementType();
-
-    SDValue Src = Op.getOperand(0);
-    MVT SrcVT = Src.getSimpleValueType();
-    MVT SrcEltVT = SrcVT.getVectorElementType();
-
-    assert(DstEltVT.bitsLT(SrcEltVT) &&
-           isPowerOf2_64(DstEltVT.getSizeInBits()) &&
-           isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
-           "Unexpected vector truncate lowering");
-
-    MVT ContainerVT = SrcVT;
-    if (SrcVT.isFixedLengthVector()) {
-      ContainerVT = getContainerForFixedLengthVector(SrcVT);
-      Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
-    }
-
-    SDValue Result = Src;
-    SDValue Mask, VL;
-    std::tie(Mask, VL) =
-        getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
-    LLVMContext &Context = *DAG.getContext();
-    const ElementCount Count = ContainerVT.getVectorElementCount();
-    do {
-      SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
-      EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
-      Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
-                           Mask, VL);
-    } while (SrcEltVT != DstEltVT);
-
-    if (SrcVT.isFixedLengthVector())
-      Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
-
-    return Result;
-  }
+    return lowerVectorTruncLike(Op, DAG);
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:
     if (Op.getOperand(0).getValueType().isVector() &&
@@ -3704,6 +3662,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerVPOp(Op, DAG,
                      Op.getOpcode() == ISD::VP_SEXT ? RISCVISD::VSEXT_VL
                                                     : RISCVISD::VZEXT_VL);
+  case ISD::VP_TRUNC:
+    return lowerVectorTruncLike(Op, DAG);
   case ISD::VP_FPTOSI:
     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
   case ISD::VP_FPTOUI:
@@ -4356,8 +4316,9 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
 // Custom-lower truncations from vectors to mask vectors by using a mask and a
 // setcc operation:
 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
-SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
-                                                  SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNC;
   SDLoc DL(Op);
   EVT MaskVT = Op.getValueType();
   // Only expect to custom-lower truncations to mask types
@@ -4365,36 +4326,108 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
          "Unexpected type for vector mask lowering");
   SDValue Src = Op.getOperand(0);
   MVT VecVT = Src.getSimpleValueType();
-
+  SDValue Mask, VL;
+  if (IsVPTrunc) {
+    Mask = Op.getOperand(1);
+    VL = Op.getOperand(2);
+  }
   // If this is a fixed vector, we need to convert it to a scalable vector.
   MVT ContainerVT = VecVT;
+
   if (VecVT.isFixedLengthVector()) {
     ContainerVT = getContainerForFixedLengthVector(VecVT);
     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+    if (IsVPTrunc) {
+      MVT MaskContainerVT =
+          getContainerForFixedLengthVector(Mask.getSimpleValueType());
+      Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+    }
+  }
+
+  if (!IsVPTrunc) {
+    std::tie(Mask, VL) =
+        getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   }
 
   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
 
   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
-                         DAG.getUNDEF(ContainerVT), SplatOne);
+                         DAG.getUNDEF(ContainerVT), SplatOne, VL);
   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
-                          DAG.getUNDEF(ContainerVT), SplatZero);
-
-  if (VecVT.isScalableVector()) {
-    SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
-    return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
-  }
-
-  SDValue Mask, VL;
-  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+                          DAG.getUNDEF(ContainerVT), SplatZero, VL);
 
   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
   SDValue Trunc =
       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
                       DAG.getCondCode(ISD::SETNE), Mask, VL);
-  return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+  if (MaskVT.isFixedLengthVector())
+    Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+  return Trunc;
+}
+
+SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNC;
+  SDLoc DL(Op);
+
+  MVT VT = Op.getSimpleValueType();
+  // Only custom-lower vector truncates
+  assert(VT.isVector() && "Unexpected type for vector truncate lowering");
+
+  // Truncates to mask types are handled 
diff erently
+  if (VT.getVectorElementType() == MVT::i1)
+    return lowerVectorMaskTruncLike(Op, DAG);
+
+  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
+  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
+  // truncate by one power of two at a time.
+  MVT DstEltVT = VT.getVectorElementType();
+
+  SDValue Src = Op.getOperand(0);
+  MVT SrcVT = Src.getSimpleValueType();
+  MVT SrcEltVT = SrcVT.getVectorElementType();
+
+  assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
+         isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
+         "Unexpected vector truncate lowering");
+
+  MVT ContainerVT = SrcVT;
+  SDValue Mask, VL;
+  if (IsVPTrunc) {
+    Mask = Op.getOperand(1);
+    VL = Op.getOperand(2);
+  }
+  if (SrcVT.isFixedLengthVector()) {
+    ContainerVT = getContainerForFixedLengthVector(SrcVT);
+    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+    if (IsVPTrunc) {
+      MVT MaskVT =
+          MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+      Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+    }
+  }
+
+  SDValue Result = Src;
+  if (!IsVPTrunc) {
+    std::tie(Mask, VL) =
+        getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+  }
+
+  LLVMContext &Context = *DAG.getContext();
+  const ElementCount Count = ContainerVT.getVectorElementCount();
+  do {
+    SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
+    EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
+    Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
+                         Mask, VL);
+  } while (SrcEltVT != DstEltVT);
+
+  if (SrcVT.isFixedLengthVector())
+    Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+  return Result;
 }
 
 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f805c2ba26475..56d0779905fc4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -612,7 +612,8 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
                              int64_t ExtTrueVal) const;
-  SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll
new file mode 100644
index 0000000000000..84d2f3c7325c3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+
+declare <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16>, <2 x i1>, i32)
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i1> %v
+}
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i16_unmasked(<2 x i16> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i1> %v
+}
+
+declare <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32>, <2 x i1>, i32)
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i1> %v
+}
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i1> %v
+}
+
+declare <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64>, <2 x i1>, i32)
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i1> %v
+}
+
+define <2 x i1> @vtrunc_nxv2i1_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i1> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll
new file mode 100644
index 0000000000000..0f986c6e8a5ee
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+
+declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16>, <2 x i1>, i32)
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i16_unmasked(<2 x i16> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i8> %v
+}
+
+declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32>, <2 x i1>, i32)
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i8> %v
+}
+
+declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64>, <2 x i1>, i32)
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vtrunc_nxv2i8_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32>, <2 x i1>, i32)
+
+define <2 x i16> @vtrunc_nxv2i16_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vtrunc_nxv2i16_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i16> %v
+}
+
+declare <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64>, <2 x i1>, i32)
+
+define <2 x i16> @vtrunc_nxv2i16_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vtrunc_nxv2i16_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64>, <2 x i1>, i32)
+
+define <2 x i32> @vtrunc_nxv2i32_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64> %a, <2 x i1> %m, i32 %vl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+  ret <2 x i32> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
new file mode 100644
index 0000000000000..03790ddfb0957
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i1> %v
+}
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i16_unmasked(<vscale x 2 x i16> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<vscale x 2 x i16> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i1> %v
+}
+
+declare <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vmsne.vi v0, v8, 0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i1> %v
+}
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i1> %v
+}
+
+declare <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vand.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmsne.vi v8, v10, 0, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i1> %v
+}
+
+define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64_unmasked(<vscale x 2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i1> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
new file mode 100644
index 0000000000000..f124ee63785bc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i16_unmasked(<vscale x 2 x i16> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vtrunc_nxv2i8_nxv2i64_unmasked(<vscale x 2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v8
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vtrunc_nxv2i16_nxv2i64_unmasked(<vscale x 2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vtrunc_nxv2i32_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<vscale x 2 x i64> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vncvt.x.x.w v10, v8
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<vscale x 2 x i64> %a, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer), i32 %vl)
+  ret <vscale x 2 x i32> %v
+}