[llvm] 3f0df4d - [RISCV] Expand scalable-vector truncstores and extloads
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 5 09:10:47 PDT 2021
Author: Fraser Cormack
Date: 2021-04-05T17:03:45+01:00
New Revision: 3f0df4d7b0269623cfcd0ef6272e6a3a2ad9066d
URL: https://github.com/llvm/llvm-project/commit/3f0df4d7b0269623cfcd0ef6272e6a3a2ad9066d
DIFF: https://github.com/llvm/llvm-project/commit/3f0df4d7b0269623cfcd0ef6272e6a3a2ad9066d.diff
LOG: [RISCV] Expand scalable-vector truncstores and extloads
Caught in internal testing, these operations are assumed legal by
default, even for scalable vector types. Expand them back into separate
truncations and stores, or loads and extensions.
Also add explicit fixed-length vector tests for these operations, even
though they should have been correct already.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D99654
Added:
llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 75ad5b9ed3949..65002c3a20dc4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -440,6 +440,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+
+ // Expand all extending loads to types larger than this, and truncating
+ // stores from types larger than this.
+ for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
+ setTruncStoreAction(OtherVT, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ }
}
for (MVT VT : IntVecVTs) {
@@ -498,6 +507,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+
+ for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
+ setTruncStoreAction(VT, OtherVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ }
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
@@ -545,17 +561,32 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
};
+ // Sets common extload/truncstore actions on RVV floating-point vector
+ // types.
+ const auto SetCommonVFPExtLoadTruncStoreActions =
+ [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
+ for (auto SmallVT : SmallerVTs) {
+ setTruncStoreAction(VT, SmallVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
+ }
+ };
+
if (Subtarget.hasStdExtZfh())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
- if (Subtarget.hasStdExtF())
- for (MVT VT : F32VecVTs)
+ for (MVT VT : F32VecVTs) {
+ if (Subtarget.hasStdExtF())
SetCommonVFPActions(VT);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ }
- if (Subtarget.hasStdExtD())
- for (MVT VT : F64VecVTs)
+ for (MVT VT : F64VecVTs) {
+ if (Subtarget.hasStdExtD())
SetCommonVFPActions(VT);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ }
if (Subtarget.useRVVForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
@@ -565,8 +596,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
- for (MVT OtherVT : MVT::fixedlen_vector_valuetypes())
+ for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ }
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
new file mode 100644
index 0000000000000..bf8ed71c67e08
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
@@ -0,0 +1,1427 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i8> @sextload_nxv1i1_nxv1i8(<vscale x 1 x i1>* %x) {
+; CHECK-LABEL: sextload_nxv1i1_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v8, v25, -1, v0
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i1>, <vscale x 1 x i1>* %x
+ %z = sext <vscale x 1 x i1> %y to <vscale x 1 x i8>
+ ret <vscale x 1 x i8> %z
+}
+
+define <vscale x 1 x i16> @sextload_nxv1i8_nxv1i16(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv1i8_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = sext <vscale x 1 x i8> %y to <vscale x 1 x i16>
+ ret <vscale x 1 x i16> %z
+}
+
+define <vscale x 1 x i16> @zextload_nxv1i8_nxv1i16(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv1i8_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = zext <vscale x 1 x i8> %y to <vscale x 1 x i16>
+ ret <vscale x 1 x i16> %z
+}
+
+define <vscale x 1 x i32> @sextload_nxv1i8_nxv1i32(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv1i8_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = sext <vscale x 1 x i8> %y to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %z
+}
+
+define <vscale x 1 x i32> @zextload_nxv1i8_nxv1i32(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv1i8_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = zext <vscale x 1 x i8> %y to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %z
+}
+
+define <vscale x 1 x i64> @sextload_nxv1i8_nxv1i64(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv1i8_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = sext <vscale x 1 x i8> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define <vscale x 1 x i64> @zextload_nxv1i8_nxv1i64(<vscale x 1 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv1i8_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i8>, <vscale x 1 x i8>* %x
+ %z = zext <vscale x 1 x i8> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define <vscale x 2 x i16> @sextload_nxv2i8_nxv2i16(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv2i8_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = sext <vscale x 2 x i8> %y to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %z
+}
+
+define <vscale x 2 x i16> @zextload_nxv2i8_nxv2i16(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv2i8_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = zext <vscale x 2 x i8> %y to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %z
+}
+
+define <vscale x 2 x i32> @sextload_nxv2i8_nxv2i32(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv2i8_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = sext <vscale x 2 x i8> %y to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %z
+}
+
+define <vscale x 2 x i32> @zextload_nxv2i8_nxv2i32(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv2i8_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = zext <vscale x 2 x i8> %y to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %z
+}
+
+define <vscale x 2 x i64> @sextload_nxv2i8_nxv2i64(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv2i8_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = sext <vscale x 2 x i8> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @zextload_nxv2i8_nxv2i64(<vscale x 2 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv2i8_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i8>, <vscale x 2 x i8>* %x
+ %z = zext <vscale x 2 x i8> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 4 x i16> @sextload_nxv4i8_nxv4i16(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv4i8_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = sext <vscale x 4 x i8> %y to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %z
+}
+
+define <vscale x 4 x i16> @zextload_nxv4i8_nxv4i16(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv4i8_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = zext <vscale x 4 x i8> %y to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %z
+}
+
+define <vscale x 4 x i32> @sextload_nxv4i8_nxv4i32(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv4i8_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = sext <vscale x 4 x i8> %y to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %z
+}
+
+define <vscale x 4 x i32> @zextload_nxv4i8_nxv4i32(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv4i8_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = zext <vscale x 4 x i8> %y to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %z
+}
+
+define <vscale x 4 x i64> @sextload_nxv4i8_nxv4i64(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv4i8_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = sext <vscale x 4 x i8> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define <vscale x 4 x i64> @zextload_nxv4i8_nxv4i64(<vscale x 4 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv4i8_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i8>, <vscale x 4 x i8>* %x
+ %z = zext <vscale x 4 x i8> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define <vscale x 8 x i16> @sextload_nxv8i8_nxv8i16(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv8i8_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %z
+}
+
+define <vscale x 8 x i16> @zextload_nxv8i8_nxv8i16(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv8i8_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %z
+}
+
+define <vscale x 8 x i32> @sextload_nxv8i8_nxv8i32(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv8i8_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %z
+}
+
+define <vscale x 8 x i32> @zextload_nxv8i8_nxv8i32(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv8i8_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %z
+}
+
+define <vscale x 8 x i64> @sextload_nxv8i8_nxv8i64(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv8i8_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = sext <vscale x 8 x i8> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define <vscale x 8 x i64> @zextload_nxv8i8_nxv8i64(<vscale x 8 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv8i8_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i8>, <vscale x 8 x i8>* %x
+ %z = zext <vscale x 8 x i8> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define <vscale x 16 x i16> @sextload_nxv16i8_nxv16i16(<vscale x 16 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv16i8_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
+ %z = sext <vscale x 16 x i8> %y to <vscale x 16 x i16>
+ ret <vscale x 16 x i16> %z
+}
+
+define <vscale x 16 x i16> @zextload_nxv16i8_nxv16i16(<vscale x 16 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv16i8_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
+ %z = zext <vscale x 16 x i8> %y to <vscale x 16 x i16>
+ ret <vscale x 16 x i16> %z
+}
+
+define <vscale x 16 x i32> @sextload_nxv16i8_nxv16i32(<vscale x 16 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv16i8_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
+ %z = sext <vscale x 16 x i8> %y to <vscale x 16 x i32>
+ ret <vscale x 16 x i32> %z
+}
+
+define <vscale x 16 x i32> @zextload_nxv16i8_nxv16i32(<vscale x 16 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv16i8_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i8>, <vscale x 16 x i8>* %x
+ %z = zext <vscale x 16 x i8> %y to <vscale x 16 x i32>
+ ret <vscale x 16 x i32> %z
+}
+
+define <vscale x 32 x i16> @sextload_nxv32i8_nxv32i16(<vscale x 32 x i8>* %x) {
+; CHECK-LABEL: sextload_nxv32i8_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4r.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 32 x i8>, <vscale x 32 x i8>* %x
+ %z = sext <vscale x 32 x i8> %y to <vscale x 32 x i16>
+ ret <vscale x 32 x i16> %z
+}
+
+define <vscale x 32 x i16> @zextload_nxv32i8_nxv32i16(<vscale x 32 x i8>* %x) {
+; CHECK-LABEL: zextload_nxv32i8_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4r.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 32 x i8>, <vscale x 32 x i8>* %x
+ %z = zext <vscale x 32 x i8> %y to <vscale x 32 x i16>
+ ret <vscale x 32 x i16> %z
+}
+
+define void @truncstore_nxv1i8_nxv1i1(<vscale x 1 x i8> %x, <vscale x 1 x i1> *%z) {
+; CHECK-LABEL: truncstore_nxv1i8_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vand.vi v25, v8, 1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i8> %x to <vscale x 1 x i1>
+ store <vscale x 1 x i1> %y, <vscale x 1 x i1>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i16_nxv1i8(<vscale x 1 x i16> %x, <vscale x 1 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv1i16_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
+ store <vscale x 1 x i8> %y, <vscale x 1 x i8>* %z
+ ret void
+}
+
+define <vscale x 1 x i32> @sextload_nxv1i16_nxv1i32(<vscale x 1 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv1i16_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i16>, <vscale x 1 x i16>* %x
+ %z = sext <vscale x 1 x i16> %y to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %z
+}
+
+define <vscale x 1 x i32> @zextload_nxv1i16_nxv1i32(<vscale x 1 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv1i16_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i16>, <vscale x 1 x i16>* %x
+ %z = zext <vscale x 1 x i16> %y to <vscale x 1 x i32>
+ ret <vscale x 1 x i32> %z
+}
+
+define <vscale x 1 x i64> @sextload_nxv1i16_nxv1i64(<vscale x 1 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv1i16_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i16>, <vscale x 1 x i16>* %x
+ %z = sext <vscale x 1 x i16> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define <vscale x 1 x i64> @zextload_nxv1i16_nxv1i64(<vscale x 1 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv1i16_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i16>, <vscale x 1 x i16>* %x
+ %z = zext <vscale x 1 x i16> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define void @truncstore_nxv2i16_nxv2i8(<vscale x 2 x i16> %x, <vscale x 2 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv2i16_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i16> %x to <vscale x 2 x i8>
+ store <vscale x 2 x i8> %y, <vscale x 2 x i8>* %z
+ ret void
+}
+
+define <vscale x 2 x i32> @sextload_nxv2i16_nxv2i32(<vscale x 2 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv2i16_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i16>, <vscale x 2 x i16>* %x
+ %z = sext <vscale x 2 x i16> %y to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %z
+}
+
+define <vscale x 2 x i32> @zextload_nxv2i16_nxv2i32(<vscale x 2 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv2i16_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i16>, <vscale x 2 x i16>* %x
+ %z = zext <vscale x 2 x i16> %y to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %z
+}
+
+define <vscale x 2 x i64> @sextload_nxv2i16_nxv2i64(<vscale x 2 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv2i16_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i16>, <vscale x 2 x i16>* %x
+ %z = sext <vscale x 2 x i16> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @zextload_nxv2i16_nxv2i64(<vscale x 2 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv2i16_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i16>, <vscale x 2 x i16>* %x
+ %z = zext <vscale x 2 x i16> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define void @truncstore_nxv4i16_nxv4i8(<vscale x 4 x i16> %x, <vscale x 4 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv4i16_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i16> %x to <vscale x 4 x i8>
+ store <vscale x 4 x i8> %y, <vscale x 4 x i8>* %z
+ ret void
+}
+
+define <vscale x 4 x i32> @sextload_nxv4i16_nxv4i32(<vscale x 4 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv4i16_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i16>, <vscale x 4 x i16>* %x
+ %z = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %z
+}
+
+define <vscale x 4 x i32> @zextload_nxv4i16_nxv4i32(<vscale x 4 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv4i16_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i16>, <vscale x 4 x i16>* %x
+ %z = zext <vscale x 4 x i16> %y to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %z
+}
+
+define <vscale x 4 x i64> @sextload_nxv4i16_nxv4i64(<vscale x 4 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv4i16_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i16>, <vscale x 4 x i16>* %x
+ %z = sext <vscale x 4 x i16> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define <vscale x 4 x i64> @zextload_nxv4i16_nxv4i64(<vscale x 4 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv4i16_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i16>, <vscale x 4 x i16>* %x
+ %z = zext <vscale x 4 x i16> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define void @truncstore_nxv8i16_nxv8i8(<vscale x 8 x i16> %x, <vscale x 8 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv8i16_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i16> %x to <vscale x 8 x i8>
+ store <vscale x 8 x i8> %y, <vscale x 8 x i8>* %z
+ ret void
+}
+
+define <vscale x 8 x i32> @sextload_nxv8i16_nxv8i32(<vscale x 8 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv8i16_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i16>, <vscale x 8 x i16>* %x
+ %z = sext <vscale x 8 x i16> %y to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %z
+}
+
+define <vscale x 8 x i32> @zextload_nxv8i16_nxv8i32(<vscale x 8 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv8i16_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i16>, <vscale x 8 x i16>* %x
+ %z = zext <vscale x 8 x i16> %y to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %z
+}
+
+define <vscale x 8 x i64> @sextload_nxv8i16_nxv8i64(<vscale x 8 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv8i16_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i16>, <vscale x 8 x i16>* %x
+ %z = sext <vscale x 8 x i16> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define <vscale x 8 x i64> @zextload_nxv8i16_nxv8i64(<vscale x 8 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv8i16_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i16>, <vscale x 8 x i16>* %x
+ %z = zext <vscale x 8 x i16> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define void @truncstore_nxv16i16_nxv16i8(<vscale x 16 x i16> %x, <vscale x 16 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv16i16_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 16 x i16> %x to <vscale x 16 x i8>
+ store <vscale x 16 x i8> %y, <vscale x 16 x i8>* %z
+ ret void
+}
+
+define <vscale x 16 x i32> @sextload_nxv16i16_nxv16i32(<vscale x 16 x i16>* %x) {
+; CHECK-LABEL: sextload_nxv16i16_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i16>, <vscale x 16 x i16>* %x
+ %z = sext <vscale x 16 x i16> %y to <vscale x 16 x i32>
+ ret <vscale x 16 x i32> %z
+}
+
+define <vscale x 16 x i32> @zextload_nxv16i16_nxv16i32(<vscale x 16 x i16>* %x) {
+; CHECK-LABEL: zextload_nxv16i16_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x i16>, <vscale x 16 x i16>* %x
+ %z = zext <vscale x 16 x i16> %y to <vscale x 16 x i32>
+ ret <vscale x 16 x i32> %z
+}
+
+define void @truncstore_nxv32i16_nxv32i8(<vscale x 32 x i16> %x, <vscale x 32 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv32i16_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 32 x i16> %x to <vscale x 32 x i8>
+ store <vscale x 32 x i8> %y, <vscale x 32 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i32_nxv1i8(<vscale x 1 x i32> %x, <vscale x 1 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv1i32_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ store <vscale x 1 x i8> %y, <vscale x 1 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i32_nxv1i16(<vscale x 1 x i32> %x, <vscale x 1 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv1i32_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i32> %x to <vscale x 1 x i16>
+ store <vscale x 1 x i16> %y, <vscale x 1 x i16>* %z
+ ret void
+}
+
+define <vscale x 1 x i64> @sextload_nxv1i32_nxv1i64(<vscale x 1 x i32>* %x) {
+; CHECK-LABEL: sextload_nxv1i32_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i32>, <vscale x 1 x i32>* %x
+ %z = sext <vscale x 1 x i32> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define <vscale x 1 x i64> @zextload_nxv1i32_nxv1i64(<vscale x 1 x i32>* %x) {
+; CHECK-LABEL: zextload_nxv1i32_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x i32>, <vscale x 1 x i32>* %x
+ %z = zext <vscale x 1 x i32> %y to <vscale x 1 x i64>
+ ret <vscale x 1 x i64> %z
+}
+
+define void @truncstore_nxv2i32_nxv2i8(<vscale x 2 x i32> %x, <vscale x 2 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv2i32_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i32> %x to <vscale x 2 x i8>
+ store <vscale x 2 x i8> %y, <vscale x 2 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv2i32_nxv2i16(<vscale x 2 x i32> %x, <vscale x 2 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv2i32_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i32> %x to <vscale x 2 x i16>
+ store <vscale x 2 x i16> %y, <vscale x 2 x i16>* %z
+ ret void
+}
+
+define <vscale x 2 x i64> @sextload_nxv2i32_nxv2i64(<vscale x 2 x i32>* %x) {
+; CHECK-LABEL: sextload_nxv2i32_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i32>, <vscale x 2 x i32>* %x
+ %z = sext <vscale x 2 x i32> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @zextload_nxv2i32_nxv2i64(<vscale x 2 x i32>* %x) {
+; CHECK-LABEL: zextload_nxv2i32_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x i32>, <vscale x 2 x i32>* %x
+ %z = zext <vscale x 2 x i32> %y to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %z
+}
+
+define void @truncstore_nxv4i32_nxv4i8(<vscale x 4 x i32> %x, <vscale x 4 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv4i32_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i8>
+ store <vscale x 4 x i8> %y, <vscale x 4 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv4i32_nxv4i16(<vscale x 4 x i32> %x, <vscale x 4 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv4i32_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i16>
+ store <vscale x 4 x i16> %y, <vscale x 4 x i16>* %z
+ ret void
+}
+
+define <vscale x 4 x i64> @sextload_nxv4i32_nxv4i64(<vscale x 4 x i32>* %x) {
+; CHECK-LABEL: sextload_nxv4i32_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re32.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i32>, <vscale x 4 x i32>* %x
+ %z = sext <vscale x 4 x i32> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define <vscale x 4 x i64> @zextload_nxv4i32_nxv4i64(<vscale x 4 x i32>* %x) {
+; CHECK-LABEL: zextload_nxv4i32_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re32.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x i32>, <vscale x 4 x i32>* %x
+ %z = zext <vscale x 4 x i32> %y to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %z
+}
+
+define void @truncstore_nxv8i32_nxv8i8(<vscale x 8 x i32> %x, <vscale x 8 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv8i32_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i8>
+ store <vscale x 8 x i8> %y, <vscale x 8 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv8i32_nxv8i16(<vscale x 8 x i32> %x, <vscale x 8 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv8i32_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i16>
+ store <vscale x 8 x i16> %y, <vscale x 8 x i16>* %z
+ ret void
+}
+
+define <vscale x 8 x i64> @sextload_nxv8i32_nxv8i64(<vscale x 8 x i32>* %x) {
+; CHECK-LABEL: sextload_nxv8i32_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i32>, <vscale x 8 x i32>* %x
+ %z = sext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define <vscale x 8 x i64> @zextload_nxv8i32_nxv8i64(<vscale x 8 x i32>* %x) {
+; CHECK-LABEL: zextload_nxv8i32_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x i32>, <vscale x 8 x i32>* %x
+ %z = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %z
+}
+
+define void @truncstore_nxv16i32_nxv16i8(<vscale x 16 x i32> %x, <vscale x 16 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv16i32_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v28, 0
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i8>
+ store <vscale x 16 x i8> %y, <vscale x 16 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv16i32_nxv16i16(<vscale x 16 x i32> %x, <vscale x 16 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv16i32_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i16>
+ store <vscale x 16 x i16> %y, <vscale x 16 x i16>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i64_nxv1i8(<vscale x 1 x i64> %x, <vscale x 1 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv1i64_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ store <vscale x 1 x i8> %y, <vscale x 1 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i64_nxv1i16(<vscale x 1 x i64> %x, <vscale x 1 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv1i64_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i16>
+ store <vscale x 1 x i16> %y, <vscale x 1 x i16>* %z
+ ret void
+}
+
+define void @truncstore_nxv1i64_nxv1i32(<vscale x 1 x i64> %x, <vscale x 1 x i32>* %z) {
+; CHECK-LABEL: truncstore_nxv1i64_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i32>
+ store <vscale x 1 x i32> %y, <vscale x 1 x i32>* %z
+ ret void
+}
+
+define void @truncstore_nxv2i64_nxv2i8(<vscale x 2 x i64> %x, <vscale x 2 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv2i64_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i8>
+ store <vscale x 2 x i8> %y, <vscale x 2 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv2i64_nxv2i16(<vscale x 2 x i64> %x, <vscale x 2 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv2i64_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i16>
+ store <vscale x 2 x i16> %y, <vscale x 2 x i16>* %z
+ ret void
+}
+
+define void @truncstore_nxv2i64_nxv2i32(<vscale x 2 x i64> %x, <vscale x 2 x i32>* %z) {
+; CHECK-LABEL: truncstore_nxv2i64_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
+ store <vscale x 2 x i32> %y, <vscale x 2 x i32>* %z
+ ret void
+}
+
+define void @truncstore_nxv4i64_nxv4i8(<vscale x 4 x i64> %x, <vscale x 4 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv4i64_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i8>
+ store <vscale x 4 x i8> %y, <vscale x 4 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv4i64_nxv4i16(<vscale x 4 x i64> %x, <vscale x 4 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv4i64_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i16>
+ store <vscale x 4 x i16> %y, <vscale x 4 x i16>* %z
+ ret void
+}
+
+define void @truncstore_nxv4i64_nxv4i32(<vscale x 4 x i64> %x, <vscale x 4 x i32>* %z) {
+; CHECK-LABEL: truncstore_nxv4i64_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v8, 0
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i32>
+ store <vscale x 4 x i32> %y, <vscale x 4 x i32>* %z
+ ret void
+}
+
+define void @truncstore_nxv8i64_nxv8i8(<vscale x 8 x i64> %x, <vscale x 8 x i8>* %z) {
+; CHECK-LABEL: truncstore_nxv8i64_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v28, 0
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i8>
+ store <vscale x 8 x i8> %y, <vscale x 8 x i8>* %z
+ ret void
+}
+
+define void @truncstore_nxv8i64_nxv8i16(<vscale x 8 x i64> %x, <vscale x 8 x i16>* %z) {
+; CHECK-LABEL: truncstore_nxv8i64_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v28, 0
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i16>
+ store <vscale x 8 x i16> %y, <vscale x 8 x i16>* %z
+ ret void
+}
+
+define void @truncstore_nxv8i64_nxv8i32(<vscale x 8 x i64> %x, <vscale x 8 x i32>* %z) {
+; CHECK-LABEL: truncstore_nxv8i64_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i32>
+ store <vscale x 8 x i32> %y, <vscale x 8 x i32>* %z
+ ret void
+}
+
+define <vscale x 1 x float> @extload_nxv1f16_nxv1f32(<vscale x 1 x half>* %x) {
+; CHECK-LABEL: extload_nxv1f16_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x half>, <vscale x 1 x half>* %x
+ %z = fpext <vscale x 1 x half> %y to <vscale x 1 x float>
+ ret <vscale x 1 x float> %z
+}
+
+define <vscale x 1 x double> @extload_nxv1f16_nxv1f64(<vscale x 1 x half>* %x) {
+; CHECK-LABEL: extload_nxv1f16_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x half>, <vscale x 1 x half>* %x
+ %z = fpext <vscale x 1 x half> %y to <vscale x 1 x double>
+ ret <vscale x 1 x double> %z
+}
+
+define <vscale x 2 x float> @extload_nxv2f16_nxv2f32(<vscale x 2 x half>* %x) {
+; CHECK-LABEL: extload_nxv2f16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x half>, <vscale x 2 x half>* %x
+ %z = fpext <vscale x 2 x half> %y to <vscale x 2 x float>
+ ret <vscale x 2 x float> %z
+}
+
+define <vscale x 2 x double> @extload_nxv2f16_nxv2f64(<vscale x 2 x half>* %x) {
+; CHECK-LABEL: extload_nxv2f16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x half>, <vscale x 2 x half>* %x
+ %z = fpext <vscale x 2 x half> %y to <vscale x 2 x double>
+ ret <vscale x 2 x double> %z
+}
+
+define <vscale x 4 x float> @extload_nxv4f16_nxv4f32(<vscale x 4 x half>* %x) {
+; CHECK-LABEL: extload_nxv4f16_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x half>, <vscale x 4 x half>* %x
+ %z = fpext <vscale x 4 x half> %y to <vscale x 4 x float>
+ ret <vscale x 4 x float> %z
+}
+
+define <vscale x 4 x double> @extload_nxv4f16_nxv4f64(<vscale x 4 x half>* %x) {
+; CHECK-LABEL: extload_nxv4f16_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x half>, <vscale x 4 x half>* %x
+ %z = fpext <vscale x 4 x half> %y to <vscale x 4 x double>
+ ret <vscale x 4 x double> %z
+}
+
+define <vscale x 8 x float> @extload_nxv8f16_nxv8f32(<vscale x 8 x half>* %x) {
+; CHECK-LABEL: extload_nxv8f16_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x half>, <vscale x 8 x half>* %x
+ %z = fpext <vscale x 8 x half> %y to <vscale x 8 x float>
+ ret <vscale x 8 x float> %z
+}
+
+define <vscale x 8 x double> @extload_nxv8f16_nxv8f64(<vscale x 8 x half>* %x) {
+; CHECK-LABEL: extload_nxv8f16_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v28, v26
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x half>, <vscale x 8 x half>* %x
+ %z = fpext <vscale x 8 x half> %y to <vscale x 8 x double>
+ ret <vscale x 8 x double> %z
+}
+
+define <vscale x 16 x float> @extload_nxv16f16_nxv16f32(<vscale x 16 x half>* %x) {
+; CHECK-LABEL: extload_nxv16f16_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x half>, <vscale x 16 x half>* %x
+ %z = fpext <vscale x 16 x half> %y to <vscale x 16 x float>
+ ret <vscale x 16 x float> %z
+}
+
+define void @truncstore_nxv1f32_nxv1f16(<vscale x 1 x float> %x, <vscale x 1 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv1f32_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 1 x float> %x to <vscale x 1 x half>
+ store <vscale x 1 x half> %y, <vscale x 1 x half>* %z
+ ret void
+}
+
+define <vscale x 1 x double> @extload_nxv1f32_nxv1f64(<vscale x 1 x float>* %x) {
+; CHECK-LABEL: extload_nxv1f32_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 1 x float>, <vscale x 1 x float>* %x
+ %z = fpext <vscale x 1 x float> %y to <vscale x 1 x double>
+ ret <vscale x 1 x double> %z
+}
+
+define void @truncstore_nxv2f32_nxv2f16(<vscale x 2 x float> %x, <vscale x 2 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv2f32_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x float> %x to <vscale x 2 x half>
+ store <vscale x 2 x half> %y, <vscale x 2 x half>* %z
+ ret void
+}
+
+define <vscale x 2 x double> @extload_nxv2f32_nxv2f64(<vscale x 2 x float>* %x) {
+; CHECK-LABEL: extload_nxv2f32_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x float>, <vscale x 2 x float>* %x
+ %z = fpext <vscale x 2 x float> %y to <vscale x 2 x double>
+ ret <vscale x 2 x double> %z
+}
+
+define void @truncstore_nxv4f32_nxv4f16(<vscale x 4 x float> %x, <vscale x 4 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv4f32_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x float> %x to <vscale x 4 x half>
+ store <vscale x 4 x half> %y, <vscale x 4 x half>* %z
+ ret void
+}
+
+define <vscale x 4 x double> @extload_nxv4f32_nxv4f64(<vscale x 4 x float>* %x) {
+; CHECK-LABEL: extload_nxv4f32_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re32.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x float>, <vscale x 4 x float>* %x
+ %z = fpext <vscale x 4 x float> %y to <vscale x 4 x double>
+ ret <vscale x 4 x double> %z
+}
+
+define void @truncstore_nxv8f32_nxv8f16(<vscale x 8 x float> %x, <vscale x 8 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv8f32_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v8
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x float> %x to <vscale x 8 x half>
+ store <vscale x 8 x half> %y, <vscale x 8 x half>* %z
+ ret void
+}
+
+define <vscale x 8 x double> @extload_nxv8f32_nxv8f64(<vscale x 8 x float>* %x) {
+; CHECK-LABEL: extload_nxv8f32_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x float>, <vscale x 8 x float>* %x
+ %z = fpext <vscale x 8 x float> %y to <vscale x 8 x double>
+ ret <vscale x 8 x double> %z
+}
+
+define void @truncstore_nxv16f32_nxv16f16(<vscale x 16 x float> %x, <vscale x 16 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv16f32_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v28, v8
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 16 x float> %x to <vscale x 16 x half>
+ store <vscale x 16 x half> %y, <vscale x 16 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv1f64_nxv1f16(<vscale x 1 x double> %x, <vscale x 1 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv1f64_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v25
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 1 x double> %x to <vscale x 1 x half>
+ store <vscale x 1 x half> %y, <vscale x 1 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv1f64_nxv1f32(<vscale x 1 x double> %x, <vscale x 1 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv1f64_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 1 x double> %x to <vscale x 1 x float>
+ store <vscale x 1 x float> %y, <vscale x 1 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv2f64_nxv2f16(<vscale x 2 x double> %x, <vscale x 2 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv2f64_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v25
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x half>
+ store <vscale x 2 x half> %y, <vscale x 2 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv2f64_nxv2f32(<vscale x 2 x double> %x, <vscale x 2 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv2f64_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
+ store <vscale x 2 x float> %y, <vscale x 2 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv4f64_nxv4f16(<vscale x 4 x double> %x, <vscale x 4 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv4f64_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v26, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v26
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x half>
+ store <vscale x 4 x half> %y, <vscale x 4 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv4f64_nxv4f32(<vscale x 4 x double> %x, <vscale x 4 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv4f64_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v8
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x float>
+ store <vscale x 4 x float> %y, <vscale x 4 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv8f64_nxv8f16(<vscale x 8 x double> %x, <vscale x 8 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv8f64_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v28
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x half>
+ store <vscale x 8 x half> %y, <vscale x 8 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv8f64_nxv8f32(<vscale x 8 x double> %x, <vscale x 8 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv8f64_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v28, v8
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x float>
+ store <vscale x 8 x float> %y, <vscale x 8 x float>* %z
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
new file mode 100644
index 0000000000000..481c88e91f147
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -0,0 +1,2411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+
+define <2 x i16> @sextload_v2i1_v2i16(<2 x i1>* %x) {
+; CHECK-LABEL: sextload_v2i1_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v8, v25, -1, v0
+; CHECK-NEXT: ret
+ %y = load <2 x i1>, <2 x i1>* %x
+ %z = sext <2 x i1> %y to <2 x i16>
+ ret <2 x i16> %z
+}
+
+define <2 x i16> @sextload_v2i8_v2i16(<2 x i8>* %x) {
+; CHECK-LABEL: sextload_v2i8_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = sext <2 x i8> %y to <2 x i16>
+ ret <2 x i16> %z
+}
+
+define <2 x i16> @zextload_v2i8_v2i16(<2 x i8>* %x) {
+; CHECK-LABEL: zextload_v2i8_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = zext <2 x i8> %y to <2 x i16>
+ ret <2 x i16> %z
+}
+
+define <2 x i32> @sextload_v2i8_v2i32(<2 x i8>* %x) {
+; CHECK-LABEL: sextload_v2i8_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = sext <2 x i8> %y to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i32> @zextload_v2i8_v2i32(<2 x i8>* %x) {
+; CHECK-LABEL: zextload_v2i8_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = zext <2 x i8> %y to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @sextload_v2i8_v2i64(<2 x i8>* %x) {
+; CHECK-LABEL: sextload_v2i8_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = sext <2 x i8> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i64> @zextload_v2i8_v2i64(<2 x i8>* %x) {
+; CHECK-LABEL: zextload_v2i8_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf8 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i8>, <2 x i8>* %x
+ %z = zext <2 x i8> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <4 x i16> @sextload_v4i8_v4i16(<4 x i8>* %x) {
+; CHECK-LABEL: sextload_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = sext <4 x i8> %y to <4 x i16>
+ ret <4 x i16> %z
+}
+
+define <4 x i16> @zextload_v4i8_v4i16(<4 x i8>* %x) {
+; CHECK-LABEL: zextload_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = zext <4 x i8> %y to <4 x i16>
+ ret <4 x i16> %z
+}
+
+define <4 x i32> @sextload_v4i8_v4i32(<4 x i8>* %x) {
+; CHECK-LABEL: sextload_v4i8_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = sext <4 x i8> %y to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i32> @zextload_v4i8_v4i32(<4 x i8>* %x) {
+; CHECK-LABEL: zextload_v4i8_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = zext <4 x i8> %y to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i64> @sextload_v4i8_v4i64(<4 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v4i8_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v4i8_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf8 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = sext <4 x i8> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define <4 x i64> @zextload_v4i8_v4i64(<4 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v4i8_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v4i8_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf8 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i8>, <4 x i8>* %x
+ %z = zext <4 x i8> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define <8 x i16> @sextload_v8i8_v8i16(<8 x i8>* %x) {
+; CHECK-LABEL: sextload_v8i8_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = sext <8 x i8> %y to <8 x i16>
+ ret <8 x i16> %z
+}
+
+define <8 x i16> @zextload_v8i8_v8i16(<8 x i8>* %x) {
+; CHECK-LABEL: zextload_v8i8_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = zext <8 x i8> %y to <8 x i16>
+ ret <8 x i16> %z
+}
+
+define <8 x i32> @sextload_v8i8_v8i32(<8 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v8i8_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v8i8_v8i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = sext <8 x i8> %y to <8 x i32>
+ ret <8 x i32> %z
+}
+
+define <8 x i32> @zextload_v8i8_v8i32(<8 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v8i8_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v8i8_v8i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = zext <8 x i8> %y to <8 x i32>
+ ret <8 x i32> %z
+}
+
+define <8 x i64> @sextload_v8i8_v8i64(<8 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v8i8_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v8i8_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf8 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = sext <8 x i8> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define <8 x i64> @zextload_v8i8_v8i64(<8 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v8i8_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v8i8_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf8 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i8>, <8 x i8>* %x
+ %z = zext <8 x i8> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define <16 x i16> @sextload_v16i8_v16i16(<16 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v16i8_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i8_v16i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = sext <16 x i8> %y to <16 x i16>
+ ret <16 x i16> %z
+}
+
+define <16 x i16> @zextload_v16i8_v16i16(<16 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v16i8_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i8_v16i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = zext <16 x i8> %y to <16 x i16>
+ ret <16 x i16> %z
+}
+
+define <16 x i32> @sextload_v16i8_v16i32(<16 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v16i8_v16i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i8_v16i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = sext <16 x i8> %y to <16 x i32>
+ ret <16 x i32> %z
+}
+
+define <16 x i32> @zextload_v16i8_v16i32(<16 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v16i8_v16i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i8_v16i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = zext <16 x i8> %y to <16 x i32>
+ ret <16 x i32> %z
+}
+
+define <16 x i64> @sextload_v16i8_v16i64(<16 x i8>* %x) {
+; LMULMAX1-LABEL: sextload_v16i8_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v14, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v15, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v9, v27
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v10, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v11, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf8 v13, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i8_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf8 v8, v25
+; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf8 v12, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = sext <16 x i8> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define <16 x i64> @zextload_v16i8_v16i64(<16 x i8>* %x) {
+; LMULMAX1-LABEL: zextload_v16i8_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle8.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v14, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v15, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v9, v27
+; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v10, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v11, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf8 v13, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i8_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vle8.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf8 v8, v25
+; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf8 v12, v25
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i8>, <16 x i8>* %x
+ %z = zext <16 x i8> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define void @truncstore_v2i8_v2i1(<2 x i8> %x, <2 x i1>* %z) {
+; CHECK-LABEL: truncstore_v2i8_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vand.vi v25, v8, 1
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i8> %x to <2 x i1>
+ store <2 x i1> %y, <2 x i1>* %z
+ ret void
+}
+
+define void @truncstore_v2i16_v2i8(<2 x i16> %x, <2 x i8>* %z) {
+; CHECK-LABEL: truncstore_v2i16_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i16> %x to <2 x i8>
+ store <2 x i8> %y, <2 x i8>* %z
+ ret void
+}
+
+define <2 x i32> @sextload_v2i16_v2i32(<2 x i16>* %x) {
+; CHECK-LABEL: sextload_v2i16_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i16>, <2 x i16>* %x
+ %z = sext <2 x i16> %y to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i32> @zextload_v2i16_v2i32(<2 x i16>* %x) {
+; CHECK-LABEL: zextload_v2i16_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i16>, <2 x i16>* %x
+ %z = zext <2 x i16> %y to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @sextload_v2i16_v2i64(<2 x i16>* %x) {
+; CHECK-LABEL: sextload_v2i16_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i16>, <2 x i16>* %x
+ %z = sext <2 x i16> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i64> @zextload_v2i16_v2i64(<2 x i16>* %x) {
+; CHECK-LABEL: zextload_v2i16_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf4 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i16>, <2 x i16>* %x
+ %z = zext <2 x i16> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define void @truncstore_v4i16_v4i8(<4 x i16> %x, <4 x i8>* %z) {
+; CHECK-LABEL: truncstore_v4i16_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <4 x i16> %x to <4 x i8>
+ store <4 x i8> %y, <4 x i8>* %z
+ ret void
+}
+
+define <4 x i32> @sextload_v4i16_v4i32(<4 x i16>* %x) {
+; CHECK-LABEL: sextload_v4i16_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i16>, <4 x i16>* %x
+ %z = sext <4 x i16> %y to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i32> @zextload_v4i16_v4i32(<4 x i16>* %x) {
+; CHECK-LABEL: zextload_v4i16_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <4 x i16>, <4 x i16>* %x
+ %z = zext <4 x i16> %y to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i64> @sextload_v4i16_v4i64(<4 x i16>* %x) {
+; LMULMAX1-LABEL: sextload_v4i16_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v4i16_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i16>, <4 x i16>* %x
+ %z = sext <4 x i16> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define <4 x i64> @zextload_v4i16_v4i64(<4 x i16>* %x) {
+; LMULMAX1-LABEL: zextload_v4i16_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v4i16_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i16>, <4 x i16>* %x
+ %z = zext <4 x i16> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %z) {
+; CHECK-LABEL: truncstore_v8i16_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <8 x i16> %x to <8 x i8>
+ store <8 x i8> %y, <8 x i8>* %z
+ ret void
+}
+
+define <8 x i32> @sextload_v8i16_v8i32(<8 x i16>* %x) {
+; LMULMAX1-LABEL: sextload_v8i16_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v8i16_v8i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i16>, <8 x i16>* %x
+ %z = sext <8 x i16> %y to <8 x i32>
+ ret <8 x i32> %z
+}
+
+define <8 x i32> @zextload_v8i16_v8i32(<8 x i16>* %x) {
+; LMULMAX1-LABEL: zextload_v8i16_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v8i16_v8i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i16>, <8 x i16>* %x
+ %z = zext <8 x i16> %y to <8 x i32>
+ ret <8 x i32> %z
+}
+
+define <8 x i64> @sextload_v8i16_v8i64(<8 x i16>* %x) {
+; LMULMAX1-LABEL: sextload_v8i16_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v8i16_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i16>, <8 x i16>* %x
+ %z = sext <8 x i16> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define <8 x i64> @zextload_v8i16_v8i64(<8 x i16>* %x) {
+; LMULMAX1-LABEL: zextload_v8i16_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v11, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v8i16_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vle16.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i16>, <8 x i16>* %x
+ %z = zext <8 x i16> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i16_v16i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 8
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i16_v16i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vse8.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i16> %x to <16 x i8>
+ store <16 x i8> %y, <16 x i8>* %z
+ ret void
+}
+
+define <16 x i32> @sextload_v16i16_v16i32(<16 x i16>* %x) {
+; LMULMAX1-LABEL: sextload_v16i16_v16i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v25
+; LMULMAX1-NEXT: vsext.vf2 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v11, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i16_v16i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vle16.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v26
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i16>, <16 x i16>* %x
+ %z = sext <16 x i16> %y to <16 x i32>
+ ret <16 x i32> %z
+}
+
+define <16 x i32> @zextload_v16i16_v16i32(<16 x i16>* %x) {
+; LMULMAX1-LABEL: zextload_v16i16_v16i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v25
+; LMULMAX1-NEXT: vzext.vf2 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v11, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i16_v16i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vle16.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v26
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i16>, <16 x i16>* %x
+ %z = zext <16 x i16> %y to <16 x i32>
+ ret <16 x i32> %z
+}
+
+define <16 x i64> @sextload_v16i16_v16i64(<16 x i16>* %x) {
+; LMULMAX1-LABEL: sextload_v16i16_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v10, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v11, v27
+; LMULMAX1-NEXT: vsext.vf4 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v14, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v15, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v9, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf4 v13, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i16_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vle16.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v8, v26
+; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf4 v12, v26
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i16>, <16 x i16>* %x
+ %z = sext <16 x i16> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define <16 x i64> @zextload_v16i16_v16i64(<16 x i16>* %x) {
+; LMULMAX1-LABEL: zextload_v16i16_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vle16.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle16.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v10, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v11, v27
+; LMULMAX1-NEXT: vzext.vf4 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v14, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v15, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v9, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf4 v13, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i16_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vle16.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v8, v26
+; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf4 v12, v26
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i16>, <16 x i16>* %x
+ %z = zext <16 x i16> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define void @truncstore_v2i32_v2i8(<2 x i32> %x, <2 x i8>* %z) {
+; CHECK-LABEL: truncstore_v2i32_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i32> %x to <2 x i8>
+ store <2 x i8> %y, <2 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v2i32_v2i16(<2 x i32> %x, <2 x i16>* %z) {
+; CHECK-LABEL: truncstore_v2i32_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i32> %x to <2 x i16>
+ store <2 x i16> %y, <2 x i16>* %z
+ ret void
+}
+
+define <2 x i64> @sextload_v2i32_v2i64(<2 x i32>* %x) {
+; CHECK-LABEL: sextload_v2i32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vsext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i32>, <2 x i32>* %x
+ %z = sext <2 x i32> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i64> @zextload_v2i32_v2i64(<2 x i32>* %x) {
+; CHECK-LABEL: zextload_v2i32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; CHECK-NEXT: vzext.vf2 v8, v25
+; CHECK-NEXT: ret
+ %y = load <2 x i32>, <2 x i32>* %x
+ %z = zext <2 x i32> %y to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %z) {
+; CHECK-LABEL: truncstore_v4i32_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <4 x i32> %x to <4 x i8>
+ store <4 x i8> %y, <4 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %z) {
+; CHECK-LABEL: truncstore_v4i32_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <4 x i32> %x to <4 x i16>
+ store <4 x i16> %y, <4 x i16>* %z
+ ret void
+}
+
+define <4 x i64> @sextload_v4i32_v4i64(<4 x i32>* %x) {
+; LMULMAX1-LABEL: sextload_v4i32_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v4i32_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX4-NEXT: vle32.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i32>, <4 x i32>* %x
+ %z = sext <4 x i32> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define <4 x i64> @zextload_v4i32_v4i64(<4 x i32>* %x) {
+; LMULMAX1-LABEL: zextload_v4i32_v4i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v4i32_v4i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX4-NEXT: vle32.v v25, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v25
+; LMULMAX4-NEXT: ret
+ %y = load <4 x i32>, <4 x i32>* %x
+ %z = zext <4 x i32> %y to <4 x i64>
+ ret <4 x i64> %z
+}
+
+define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v8i32_v8i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v27, 4
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v25, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v8i32_v8i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vse8.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <8 x i32> %x to <8 x i8>
+ store <8 x i8> %y, <8 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) {
+; LMULMAX1-LABEL: truncstore_v8i32_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 4
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v8i32_v8i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vse16.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <8 x i32> %x to <8 x i16>
+ store <8 x i16> %y, <8 x i16>* %z
+ ret void
+}
+
+define <8 x i64> @sextload_v8i32_v8i64(<8 x i32>* %x) {
+; LMULMAX1-LABEL: sextload_v8i32_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle32.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v25
+; LMULMAX1-NEXT: vsext.vf2 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v11, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v8i32_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vle32.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v26
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i32>, <8 x i32>* %x
+ %z = sext <8 x i32> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define <8 x i64> @zextload_v8i32_v8i64(<8 x i32>* %x) {
+; LMULMAX1-LABEL: zextload_v8i32_v8i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle32.v v26, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v25
+; LMULMAX1-NEXT: vzext.vf2 v10, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v11, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v8i32_v8i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vle32.v v26, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v26
+; LMULMAX4-NEXT: ret
+ %y = load <8 x i32>, <8 x i32>* %x
+ %z = zext <8 x i32> %y to <8 x i64>
+ ret <8 x i64> %z
+}
+
+define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i32_v16i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v25
+; LMULMAX1-NEXT: vslideup.vi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v28, 4
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v28, 4
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v27, 8
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i32_v16i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT: vse8.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i32> %x to <16 x i8>
+ store <16 x i8> %y, <16 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i32_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v26
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 4
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 4
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v26, (a1)
+; LMULMAX1-NEXT: vse16.v v27, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i32_v16i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vse16.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i32> %x to <16 x i16>
+ store <16 x i16> %y, <16 x i16>* %z
+ ret void
+}
+
+define <16 x i64> @sextload_v16i32_v16i64(<16 x i32>* %x) {
+; LMULMAX1-LABEL: sextload_v16i32_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a1, a0, 48
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a1)
+; LMULMAX1-NEXT: addi a1, a0, 32
+; LMULMAX1-NEXT: vle32.v v26, (a1)
+; LMULMAX1-NEXT: vle32.v v27, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle32.v v28, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v8, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v9, v27
+; LMULMAX1-NEXT: vsext.vf2 v10, v28
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v11, v27
+; LMULMAX1-NEXT: vsext.vf2 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v13, v26
+; LMULMAX1-NEXT: vsext.vf2 v14, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vsext.vf2 v15, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: sextload_v16i32_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vle32.v v28, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v8, v28
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vsext.vf2 v12, v28
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i32>, <16 x i32>* %x
+ %z = sext <16 x i32> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define <16 x i64> @zextload_v16i32_v16i64(<16 x i32>* %x) {
+; LMULMAX1-LABEL: zextload_v16i32_v16i64:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a1, a0, 48
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vle32.v v25, (a1)
+; LMULMAX1-NEXT: addi a1, a0, 32
+; LMULMAX1-NEXT: vle32.v v26, (a1)
+; LMULMAX1-NEXT: vle32.v v27, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
+; LMULMAX1-NEXT: vle32.v v28, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v8, v27
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v9, v27
+; LMULMAX1-NEXT: vzext.vf2 v10, v28
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v11, v27
+; LMULMAX1-NEXT: vzext.vf2 v12, v26
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v13, v26
+; LMULMAX1-NEXT: vzext.vf2 v14, v25
+; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; LMULMAX1-NEXT: vzext.vf2 v15, v25
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: zextload_v16i32_v16i64:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vle32.v v28, (a0)
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v8, v28
+; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu
+; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8
+; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; LMULMAX4-NEXT: vzext.vf2 v12, v28
+; LMULMAX4-NEXT: ret
+ %y = load <16 x i32>, <16 x i32>* %x
+ %z = zext <16 x i32> %y to <16 x i64>
+ ret <16 x i64> %z
+}
+
+define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %z) {
+; CHECK-LABEL: truncstore_v2i64_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i64> %x to <2 x i8>
+ store <2 x i8> %y, <2 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %z) {
+; CHECK-LABEL: truncstore_v2i64_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; CHECK-NEXT: vnsrl.wi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i64> %x to <2 x i16>
+ store <2 x i16> %y, <2 x i16>* %z
+ ret void
+}
+
+define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %z) {
+; CHECK-LABEL: truncstore_v2i64_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 0
+; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = trunc <2 x i64> %x to <2 x i32>
+ store <2 x i32> %y, <2 x i32>* %z
+ ret void
+}
+
+define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v4i64_v4i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v4i64_v4i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX4-NEXT: vse8.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <4 x i64> %x to <4 x i8>
+ store <4 x i8> %y, <4 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
+; LMULMAX1-LABEL: truncstore_v4i64_v4i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v27, 2
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v25, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v4i64_v4i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX4-NEXT: vse16.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <4 x i64> %x to <4 x i16>
+ store <4 x i16> %y, <4 x i16>* %z
+ ret void
+}
+
+define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) {
+; LMULMAX1-LABEL: truncstore_v4i64_v4i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vse32.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v4i64_v4i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX4-NEXT: vse32.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <4 x i64> %x to <4 x i32>
+ store <4 x i32> %y, <4 x i32>* %z
+ ret void
+}
+
+define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v8i64_v8i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v26
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v27, 4
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v25, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v8i64_v8i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX4-NEXT: vse8.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <8 x i64> %x to <8 x i8>
+ store <8 x i8> %y, <8 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) {
+; LMULMAX1-LABEL: truncstore_v8i64_v8i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v25
+; LMULMAX1-NEXT: vslideup.vi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v28, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v28, 2
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v27, 4
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v26, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v8i64_v8i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT: vse16.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <8 x i64> %x to <8 x i16>
+ store <8 x i16> %y, <8 x i16>* %z
+ ret void
+}
+
+define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) {
+; LMULMAX1-LABEL: truncstore_v8i64_v8i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v26
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vse32.v v26, (a1)
+; LMULMAX1-NEXT: vse32.v v27, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v8i64_v8i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vse32.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <8 x i64> %x to <8 x i32>
+ store <8 x i32> %y, <8 x i32>* %z
+ ret void
+}
+
+define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i64_v16i8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v14, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v26, v25
+; LMULMAX1-NEXT: vslideup.vi v26, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v15, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v27, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v12, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v28, v25
+; LMULMAX1-NEXT: vslideup.vi v28, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v13, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v29, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v29, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v28, v27, 2
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v29, v27
+; LMULMAX1-NEXT: vslideup.vi v29, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v29, v26, 4
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v28, v25
+; LMULMAX1-NEXT: vslideup.vi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v28, v26, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v26, 2
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v28, 4
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v29, 8
+; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse8.v v25, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i64_v16i8:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vmv.v.i v25, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v25, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v12, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v28, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v25, v26, 8
+; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX4-NEXT: vse8.v v25, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i64> %x to <16 x i8>
+ store <16 x i8> %y, <16 x i8>* %z
+ ret void
+}
+
+define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i64_v16i16:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v25
+; LMULMAX1-NEXT: vslideup.vi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v28, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v26, v25
+; LMULMAX1-NEXT: vslideup.vi v26, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v28, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v29, v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v29, 2
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v28, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v29, v28
+; LMULMAX1-NEXT: vslideup.vi v29, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v29, v27, 4
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v26, v14, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v26, v25
+; LMULMAX1-NEXT: vslideup.vi v26, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v15, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v30, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v12, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v30, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v27, v13, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v25, v30, 2
+; LMULMAX1-NEXT: vslideup.vi v28, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v28, v26, 4
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu
+; LMULMAX1-NEXT: vse16.v v28, (a1)
+; LMULMAX1-NEXT: vse16.v v29, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i64_v16i16:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vmv.v.i v26, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m2,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v26, v28, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v30, v28, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v26, v30, 8
+; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
+; LMULMAX4-NEXT: vse16.v v26, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i64> %x to <16 x i16>
+ store <16 x i16> %y, <16 x i16>* %z
+ ret void
+}
+
+define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
+; LMULMAX1-LABEL: truncstore_v16i64_v16i32:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v27, v26
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v27, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v28, v26
+; LMULMAX1-NEXT: vslideup.vi v28, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v28, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v12, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vmv1r.v v29, v26
+; LMULMAX1-NEXT: vslideup.vi v29, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v13, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v29, v25, 2
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v14, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; LMULMAX1-NEXT: vnsrl.wi v25, v15, 0
+; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT: addi a1, a0, 48
+; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT: vse32.v v26, (a1)
+; LMULMAX1-NEXT: addi a1, a0, 32
+; LMULMAX1-NEXT: vse32.v v29, (a1)
+; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: vse32.v v28, (a1)
+; LMULMAX1-NEXT: vse32.v v27, (a0)
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX4-LABEL: truncstore_v16i64_v16i32:
+; LMULMAX4: # %bb.0:
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v28, v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vmv.v.i v8, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m4,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v8, v28, 0
+; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
+; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0
+; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,tu,mu
+; LMULMAX4-NEXT: vslideup.vi v8, v28, 8
+; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
+; LMULMAX4-NEXT: vse32.v v8, (a0)
+; LMULMAX4-NEXT: ret
+ %y = trunc <16 x i64> %x to <16 x i32>
+ store <16 x i32> %y, <16 x i32>* %z
+ ret void
+}
+
+define <vscale x 2 x float> @extload_nxv2f16_nxv2f32(<vscale x 2 x half>* %x) {
+; CHECK-LABEL: extload_nxv2f16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x half>, <vscale x 2 x half>* %x
+ %z = fpext <vscale x 2 x half> %y to <vscale x 2 x float>
+ ret <vscale x 2 x float> %z
+}
+
+define <vscale x 2 x double> @extload_nxv2f16_nxv2f64(<vscale x 2 x half>* %x) {
+; CHECK-LABEL: extload_nxv2f16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x half>, <vscale x 2 x half>* %x
+ %z = fpext <vscale x 2 x half> %y to <vscale x 2 x double>
+ ret <vscale x 2 x double> %z
+}
+
+define <vscale x 4 x float> @extload_nxv4f16_nxv4f32(<vscale x 4 x half>* %x) {
+; CHECK-LABEL: extload_nxv4f16_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x half>, <vscale x 4 x half>* %x
+ %z = fpext <vscale x 4 x half> %y to <vscale x 4 x float>
+ ret <vscale x 4 x float> %z
+}
+
+define <vscale x 4 x double> @extload_nxv4f16_nxv4f64(<vscale x 4 x half>* %x) {
+; CHECK-LABEL: extload_nxv4f16_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re16.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v26, v25
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x half>, <vscale x 4 x half>* %x
+ %z = fpext <vscale x 4 x half> %y to <vscale x 4 x double>
+ ret <vscale x 4 x double> %z
+}
+
+define <vscale x 8 x float> @extload_nxv8f16_nxv8f32(<vscale x 8 x half>* %x) {
+; CHECK-LABEL: extload_nxv8f16_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x half>, <vscale x 8 x half>* %x
+ %z = fpext <vscale x 8 x half> %y to <vscale x 8 x float>
+ ret <vscale x 8 x float> %z
+}
+
+define <vscale x 8 x double> @extload_nxv8f16_nxv8f64(<vscale x 8 x half>* %x) {
+; CHECK-LABEL: extload_nxv8f16_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v28, v26
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x half>, <vscale x 8 x half>* %x
+ %z = fpext <vscale x 8 x half> %y to <vscale x 8 x double>
+ ret <vscale x 8 x double> %z
+}
+
+define <vscale x 16 x float> @extload_nxv16f16_nxv16f32(<vscale x 16 x half>* %x) {
+; CHECK-LABEL: extload_nxv16f16_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x half>, <vscale x 16 x half>* %x
+ %z = fpext <vscale x 16 x half> %y to <vscale x 16 x float>
+ ret <vscale x 16 x float> %z
+}
+
+define <vscale x 16 x double> @extload_nxv16f16_nxv16f64(<vscale x 16 x half>* %x) {
+; CHECK-LABEL: extload_nxv16f16_nxv16f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re16.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v16, v28
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
+; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v24, v30
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v16, v24
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x half>, <vscale x 16 x half>* %x
+ %z = fpext <vscale x 16 x half> %y to <vscale x 16 x double>
+ ret <vscale x 16 x double> %z
+}
+
+define void @truncstore_nxv2f32_nxv2f16(<vscale x 2 x float> %x, <vscale x 2 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv2f32_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x float> %x to <vscale x 2 x half>
+ store <vscale x 2 x half> %y, <vscale x 2 x half>* %z
+ ret void
+}
+
+define <vscale x 2 x double> @extload_nxv2f32_nxv2f64(<vscale x 2 x float>* %x) {
+; CHECK-LABEL: extload_nxv2f32_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re32.v v25, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v25
+; CHECK-NEXT: ret
+ %y = load <vscale x 2 x float>, <vscale x 2 x float>* %x
+ %z = fpext <vscale x 2 x float> %y to <vscale x 2 x double>
+ ret <vscale x 2 x double> %z
+}
+
+define void @truncstore_nxv4f32_nxv4f16(<vscale x 4 x float> %x, <vscale x 4 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv4f32_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x float> %x to <vscale x 4 x half>
+ store <vscale x 4 x half> %y, <vscale x 4 x half>* %z
+ ret void
+}
+
+define <vscale x 4 x double> @extload_nxv4f32_nxv4f64(<vscale x 4 x float>* %x) {
+; CHECK-LABEL: extload_nxv4f32_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re32.v v26, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v26
+; CHECK-NEXT: ret
+ %y = load <vscale x 4 x float>, <vscale x 4 x float>* %x
+ %z = fpext <vscale x 4 x float> %y to <vscale x 4 x double>
+ ret <vscale x 4 x double> %z
+}
+
+define void @truncstore_nxv8f32_nxv8f16(<vscale x 8 x float> %x, <vscale x 8 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv8f32_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v8
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x float> %x to <vscale x 8 x half>
+ store <vscale x 8 x half> %y, <vscale x 8 x half>* %z
+ ret void
+}
+
+define <vscale x 8 x double> @extload_nxv8f32_nxv8f64(<vscale x 8 x float>* %x) {
+; CHECK-LABEL: extload_nxv8f32_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4re32.v v28, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 8 x float>, <vscale x 8 x float>* %x
+ %z = fpext <vscale x 8 x float> %y to <vscale x 8 x double>
+ ret <vscale x 8 x double> %z
+}
+
+define void @truncstore_nxv16f32_nxv16f16(<vscale x 16 x float> %x, <vscale x 16 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv16f32_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v28, v8
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 16 x float> %x to <vscale x 16 x half>
+ store <vscale x 16 x half> %y, <vscale x 16 x half>* %z
+ ret void
+}
+
+define <vscale x 16 x double> @extload_nxv16f32_nxv16f64(<vscale x 16 x float>* %x) {
+; CHECK-LABEL: extload_nxv16f32_nxv16f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfwcvt.f.f.v v8, v24
+; CHECK-NEXT: vfwcvt.f.f.v v16, v28
+; CHECK-NEXT: ret
+ %y = load <vscale x 16 x float>, <vscale x 16 x float>* %x
+ %z = fpext <vscale x 16 x float> %y to <vscale x 16 x double>
+ ret <vscale x 16 x double> %z
+}
+
+define void @truncstore_nxv2f64_nxv2f16(<vscale x 2 x double> %x, <vscale x 2 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv2f64_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v25
+; CHECK-NEXT: vse16.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x half>
+ store <vscale x 2 x half> %y, <vscale x 2 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv2f64_nxv2f32(<vscale x 2 x double> %x, <vscale x 2 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv2f64_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v8
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
+ store <vscale x 2 x float> %y, <vscale x 2 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv4f64_nxv4f16(<vscale x 4 x double> %x, <vscale x 4 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv4f64_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v26, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v25, v26
+; CHECK-NEXT: vs1r.v v25, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x half>
+ store <vscale x 4 x half> %y, <vscale x 4 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv4f64_nxv4f32(<vscale x 4 x double> %x, <vscale x 4 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv4f64_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v8
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 4 x double> %x to <vscale x 4 x float>
+ store <vscale x 4 x float> %y, <vscale x 4 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv8f64_nxv8f16(<vscale x 8 x double> %x, <vscale x 8 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv8f64_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v26, v28
+; CHECK-NEXT: vs2r.v v26, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x half>
+ store <vscale x 8 x half> %y, <vscale x 8 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv8f64_nxv8f32(<vscale x 8 x double> %x, <vscale x 8 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv8f64_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v28, v8
+; CHECK-NEXT: vs4r.v v28, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 8 x double> %x to <vscale x 8 x float>
+ store <vscale x 8 x float> %y, <vscale x 8 x float>* %z
+ ret void
+}
+
+define void @truncstore_nxv16f64_nxv16f16(<vscale x 16 x double> %x, <vscale x 16 x half>* %z) {
+; CHECK-LABEL: truncstore_nxv16f64_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v8, v28
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16
+; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v10, v28
+; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 16 x double> %x to <vscale x 16 x half>
+ store <vscale x 16 x half> %y, <vscale x 16 x half>* %z
+ ret void
+}
+
+define void @truncstore_nxv16f64_nxv16f32(<vscale x 16 x double> %x, <vscale x 16 x float>* %z) {
+; CHECK-LABEL: truncstore_nxv16f64_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu
+; CHECK-NEXT: vfncvt.f.f.w v24, v8
+; CHECK-NEXT: vfncvt.f.f.w v28, v16
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: ret
+ %y = fptrunc <vscale x 16 x double> %x to <vscale x 16 x float>
+ store <vscale x 16 x float> %y, <vscale x 16 x float>* %z
+ ret void
+}
More information about the llvm-commits
mailing list