[llvm] [CodeGen][RISCV] Add support of RISCV nontemporal to vector predication instructions. (PR #153033)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 08:23:46 PDT 2025
https://github.com/daniel-trujillo-bsc created https://github.com/llvm/llvm-project/pull/153033
This PR adds support for VP intrinsics to be aware of the nontemporal metadata information.
First time contributor here. I hope these changes are simple enough to not be much of a pain to review, and I'm looking forward hear your feedback!.
I'm not a GitHub user, so I had to create a throwaway account for this, but you can write to my BSC email (in the commit and in the web: https://www.bsc.es/trujillo-daniel) to verify my identity.
>From 756345df373c2cc86620628de48b5a35d4e5b697 Mon Sep 17 00:00:00 2001
From: Daniel Trujillo Viedma <daniel.trujillo at bsc.es>
Date: Sat, 12 Jul 2025 21:00:08 +0000
Subject: [PATCH] [CodeGen][RISCV] Add support of RISCV nontemporal to vector
predication instructions.
Make RISC-V vector predicated intrinsics (specifically: load, store,
gather, scatter, strided.load and strided.store) aware of the
non-temporal related metadata.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 18 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 14 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 42 +-
.../CodeGen/RISCV/nontemporal-vp-scalable.ll | 40677 ++++++++++++++++
llvm/test/CodeGen/RISCV/nontemporal-vp.ll | 4009 ++
6 files changed, 44750 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
create mode 100644 llvm/test/CodeGen/RISCV/nontemporal-vp.ll
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index dc00db9daa3b6..3dab1b1e8712d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -91,6 +91,7 @@ class TargetLowering;
class TargetMachine;
class TargetSubtargetInfo;
class Value;
+class VPIntrinsic;
template <typename T> class GenericSSAContext;
using SSAContext = GenericSSAContext<Function>;
@@ -1007,6 +1008,11 @@ class SelectionDAG {
llvm_unreachable("Unknown opcode");
}
+ static MachineMemOperand::Flags
+ getNonTemporalMemFlag(const VPIntrinsic &VPIntrin);
+
+ static MachineMemOperand::Flags getNonTemporalMemFlag(const MemSDNode &N);
+
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a21a9b518fcde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2476,10 +2476,13 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOLoad |
+ TLI.getTargetMMOFlags(*N) |
+ SelectionDAG::getNonTemporalMemFlag(*N);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
SDValue PassThru = MGT->getPassThru();
@@ -4248,10 +4251,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
SDValue Lo;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOStore |
+ TLI.getTargetMMOFlags(*N) |
+ SelectionDAG::getNonTemporalMemFlag(*N);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5ef1746333040..4e6d52846ae44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
@@ -14055,6 +14056,19 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
SDEI[To] = std::move(NEI);
}
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const VPIntrinsic &VPIntrin) {
+ return VPIntrin.hasMetadata(LLVMContext::MD_nontemporal)
+ ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+}
+
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const MemSDNode &N) {
+ return N.isNonTemporal() ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7aa1fadd10dfc..a21992af3ce42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8438,8 +8438,12 @@ void SelectionDAGBuilder::visitVPLoad(
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
@@ -8490,9 +8494,12 @@ void SelectionDAGBuilder::visitVPGather(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8530,8 +8537,12 @@ void SelectionDAGBuilder::visitVPStore(
Alignment = DAG.getEVTAlign(VT);
SDValue Ptr = OpValues[1];
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
@@ -8553,9 +8564,12 @@ void SelectionDAGBuilder::visitVPScatter(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8596,9 +8610,13 @@ void SelectionDAGBuilder::visitVPStridedLoad(
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO,
@@ -8619,9 +8637,13 @@ void SelectionDAGBuilder::visitVPStridedStore(
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue ST = DAG.getStridedStoreVP(
getMemoryRoot(), DL, OpValues[0], OpValues[1],
diff --git a/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
new file mode 100644
index 0000000000000..4bc6313494d41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
@@ -0,0 +1,40677 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV64VC
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV32VC
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_P1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_PALL(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_S1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_ALL(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1i8_DEFAULT(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_gather_nxv1i8_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_gather_nxv1i8_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_gather_nxv1i8_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_gather_nxv1i8_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @test_nontemporal_vp_gather_nxv1i8_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i8_P1(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i8_PALL(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i8_S1(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i8_ALL(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1i8_DEFAULT(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_strided.load_nxv1i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_strided.load_nxv1i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_strided.load_nxv1i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_strided.load_nxv1i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @test_nontemporal_vp_strided.load_nxv1i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i8_P1(<vscale x 1 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i8.i64(<vscale x 1 x i8> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i8_PALL(<vscale x 1 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i8.i64(<vscale x 1 x i8> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i8_S1(<vscale x 1 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i8.i64(<vscale x 1 x i8> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i8_ALL(<vscale x 1 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i8.i64(<vscale x 1 x i8> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1i8_DEFAULT(<vscale x 1 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i8.i64(<vscale x 1 x i8> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_load_nxv1i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_load_nxv1i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_load_nxv1i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_load_nxv1i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i16> %x
+}
+
+define <vscale x 1 x i16> @test_nontemporal_vp_load_nxv1i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.load.nxv1i16.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i16_P1(<vscale x 1 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i16.p0(<vscale x 1 x i16> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i16_PALL(<vscale x 1 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i16.p0(<vscale x 1 x i16> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i16_S1(<vscale x 1 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i16.p0(<vscale x 1 x i16> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i16_ALL(<vscale x 1 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i16.p0(<vscale x 1 x i16> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1i16_DEFAULT(<vscale x 1 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i16.p0(<vscale x 1 x i16> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_gather_nxv1i16_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_gather_nxv1i16_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_gather_nxv1i16_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_gather_nxv1i16_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i16> %x
+}
+
+define <vscale x 1 x i16> @test_nontemporal_vp_gather_nxv1i16_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i16_P1(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i16_PALL(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i16_S1(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i16_ALL(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1i16_DEFAULT(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_strided.load_nxv1i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_strided.load_nxv1i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_strided.load_nxv1i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i16> %x
+}
+
+
+define <vscale x 1 x i16> @test_nontemporal_vp_strided.load_nxv1i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i16> %x
+}
+
+define <vscale x 1 x i16> @test_nontemporal_vp_strided.load_nxv1i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i16> @llvm.experimental.vp.strided.load.nxv1i16.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i16_P1(<vscale x 1 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i16.i64(<vscale x 1 x i16> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i16_PALL(<vscale x 1 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i16.i64(<vscale x 1 x i16> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i16_S1(<vscale x 1 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i16.i64(<vscale x 1 x i16> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i16_ALL(<vscale x 1 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i16.i64(<vscale x 1 x i16> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1i16_DEFAULT(<vscale x 1 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i16.i64(<vscale x 1 x i16> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_load_nxv1i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_load_nxv1i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_load_nxv1i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_load_nxv1i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i32> %x
+}
+
+define <vscale x 1 x i32> @test_nontemporal_vp_load_nxv1i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.load.nxv1i32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i32_P1(<vscale x 1 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i32.p0(<vscale x 1 x i32> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i32_PALL(<vscale x 1 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i32.p0(<vscale x 1 x i32> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i32_S1(<vscale x 1 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i32.p0(<vscale x 1 x i32> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i32_ALL(<vscale x 1 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i32.p0(<vscale x 1 x i32> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1i32_DEFAULT(<vscale x 1 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i32.p0(<vscale x 1 x i32> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_gather_nxv1i32_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_gather_nxv1i32_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_gather_nxv1i32_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_gather_nxv1i32_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i32> %x
+}
+
+define <vscale x 1 x i32> @test_nontemporal_vp_gather_nxv1i32_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i32_P1(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i32_PALL(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i32_S1(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i32_ALL(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1i32_DEFAULT(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_strided.load_nxv1i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_strided.load_nxv1i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_strided.load_nxv1i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i32> %x
+}
+
+
+define <vscale x 1 x i32> @test_nontemporal_vp_strided.load_nxv1i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i32> %x
+}
+
+define <vscale x 1 x i32> @test_nontemporal_vp_strided.load_nxv1i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i32> @llvm.experimental.vp.strided.load.nxv1i32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i32_P1(<vscale x 1 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i32.i64(<vscale x 1 x i32> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i32_PALL(<vscale x 1 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i32.i64(<vscale x 1 x i32> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i32_S1(<vscale x 1 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i32.i64(<vscale x 1 x i32> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i32_ALL(<vscale x 1 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i32.i64(<vscale x 1 x i32> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1i32_DEFAULT(<vscale x 1 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i32.i64(<vscale x 1 x i32> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_load_nxv1i64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_load_nxv1i64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_load_nxv1i64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_load_nxv1i64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i64> %x
+}
+
+define <vscale x 1 x i64> @test_nontemporal_vp_load_nxv1i64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i64_P1(<vscale x 1 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i64_PALL(<vscale x 1 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i64_S1(<vscale x 1 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i64_ALL(<vscale x 1 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1i64_DEFAULT(<vscale x 1 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_gather_nxv1i64_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_gather_nxv1i64_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_gather_nxv1i64_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_gather_nxv1i64_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i64> %x
+}
+
+define <vscale x 1 x i64> @test_nontemporal_vp_gather_nxv1i64_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i64_P1(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i64_PALL(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i64_S1(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1i64_ALL(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1i64_DEFAULT(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_strided.load_nxv1i64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_strided.load_nxv1i64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_strided.load_nxv1i64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i64> %x
+}
+
+
+define <vscale x 1 x i64> @test_nontemporal_vp_strided.load_nxv1i64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i64> %x
+}
+
+define <vscale x 1 x i64> @test_nontemporal_vp_strided.load_nxv1i64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i64_P1(<vscale x 1 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i64.i64(<vscale x 1 x i64> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i64_PALL(<vscale x 1 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i64.i64(<vscale x 1 x i64> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i64_S1(<vscale x 1 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i64.i64(<vscale x 1 x i64> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1i64_ALL(<vscale x 1 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i64.i64(<vscale x 1 x i64> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1i64_DEFAULT(<vscale x 1 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1i64.i64(<vscale x 1 x i64> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_load_nxv1f32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_load_nxv1f32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_load_nxv1f32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_load_nxv1f32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x float> %x
+}
+
+define <vscale x 1 x float> @test_nontemporal_vp_load_nxv1f32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.load.nxv1f32.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x float> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f32_P1(<vscale x 1 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f32.p0(<vscale x 1 x float> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f32_PALL(<vscale x 1 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f32.p0(<vscale x 1 x float> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f32_S1(<vscale x 1 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f32.p0(<vscale x 1 x float> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f32_ALL(<vscale x 1 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f32.p0(<vscale x 1 x float> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1f32_DEFAULT(<vscale x 1 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f32.p0(<vscale x 1 x float> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_gather_nxv1f32_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_gather_nxv1f32_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_gather_nxv1f32_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_gather_nxv1f32_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x float> %x
+}
+
+define <vscale x 1 x float> @test_nontemporal_vp_gather_nxv1f32_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x float> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f32_P1(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f32_PALL(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f32_S1(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f32_ALL(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1f32_DEFAULT(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_strided.load_nxv1f32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_strided.load_nxv1f32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_strided.load_nxv1f32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x float> %x
+}
+
+
+define <vscale x 1 x float> @test_nontemporal_vp_strided.load_nxv1f32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x float> %x
+}
+
+define <vscale x 1 x float> @test_nontemporal_vp_strided.load_nxv1f32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x float> @llvm.experimental.vp.strided.load.nxv1f32.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x float> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f32_P1(<vscale x 1 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f32.i64(<vscale x 1 x float> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f32_PALL(<vscale x 1 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f32.i64(<vscale x 1 x float> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f32_S1(<vscale x 1 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f32.i64(<vscale x 1 x float> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f32_ALL(<vscale x 1 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f32.i64(<vscale x 1 x float> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1f32_DEFAULT(<vscale x 1 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f32.i64(<vscale x 1 x float> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_load_nxv1f64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_load_nxv1f64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_load_nxv1f64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_load_nxv1f64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x double> %x
+}
+
+define <vscale x 1 x double> @test_nontemporal_vp_load_nxv1f64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x double> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f64_P1(<vscale x 1 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f64_PALL(<vscale x 1 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f64_S1(<vscale x 1 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1f64_ALL(<vscale x 1 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv1f64_DEFAULT(<vscale x 1 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_gather_nxv1f64_P1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_gather_nxv1f64_PALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_gather_nxv1f64_S1(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_gather_nxv1f64_ALL(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x double> %x
+}
+
+define <vscale x 1 x double> @test_nontemporal_vp_gather_nxv1f64_DEFAULT(<vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x double> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f64_P1(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f64_PALL(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f64_S1(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv1f64_ALL(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv1f64_DEFAULT(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_strided.load_nxv1f64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_strided.load_nxv1f64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_strided.load_nxv1f64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x double> %x
+}
+
+
+define <vscale x 1 x double> @test_nontemporal_vp_strided.load_nxv1f64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x double> %x
+}
+
+define <vscale x 1 x double> @test_nontemporal_vp_strided.load_nxv1f64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x double> @llvm.experimental.vp.strided.load.nxv1f64.i64(ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x double> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f64_P1(<vscale x 1 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f64.i64(<vscale x 1 x double> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f64_PALL(<vscale x 1 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f64.i64(<vscale x 1 x double> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f64_S1(<vscale x 1 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f64.i64(<vscale x 1 x double> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv1f64_ALL(<vscale x 1 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f64.i64(<vscale x 1 x double> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv1f64_DEFAULT(<vscale x 1 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv1f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv1f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv1f64.i64(<vscale x 1 x double> %val, ptr %p, i64 %stride, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_load_nxv2i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_load_nxv2i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_load_nxv2i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_load_nxv2i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i8> %x
+}
+
+define <vscale x 2 x i8> @test_nontemporal_vp_load_nxv2i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i8_P1(<vscale x 2 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i8_PALL(<vscale x 2 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i8_S1(<vscale x 2 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i8_ALL(<vscale x 2 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2i8_DEFAULT(<vscale x 2 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_gather_nxv2i8_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_gather_nxv2i8_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_gather_nxv2i8_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_gather_nxv2i8_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i8> %x
+}
+
+define <vscale x 2 x i8> @test_nontemporal_vp_gather_nxv2i8_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i8_P1(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i8_PALL(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i8_S1(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i8_ALL(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2i8_DEFAULT(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_strided.load_nxv2i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_strided.load_nxv2i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_strided.load_nxv2i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i8> %x
+}
+
+
+define <vscale x 2 x i8> @test_nontemporal_vp_strided.load_nxv2i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i8> %x
+}
+
+define <vscale x 2 x i8> @test_nontemporal_vp_strided.load_nxv2i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i8_P1(<vscale x 2 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i8_PALL(<vscale x 2 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i8_S1(<vscale x 2 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i8_ALL(<vscale x 2 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2i8_DEFAULT(<vscale x 2 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_load_nxv2i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_load_nxv2i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_load_nxv2i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_load_nxv2i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i16> %x
+}
+
+define <vscale x 2 x i16> @test_nontemporal_vp_load_nxv2i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.load.nxv2i16.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i16_P1(<vscale x 2 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i16.p0(<vscale x 2 x i16> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i16_PALL(<vscale x 2 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i16.p0(<vscale x 2 x i16> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i16_S1(<vscale x 2 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i16.p0(<vscale x 2 x i16> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i16_ALL(<vscale x 2 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i16.p0(<vscale x 2 x i16> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2i16_DEFAULT(<vscale x 2 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i16.p0(<vscale x 2 x i16> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_gather_nxv2i16_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_gather_nxv2i16_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_gather_nxv2i16_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_gather_nxv2i16_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i16> %x
+}
+
+define <vscale x 2 x i16> @test_nontemporal_vp_gather_nxv2i16_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i16_P1(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i16_PALL(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i16_S1(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i16_ALL(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2i16_DEFAULT(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_strided.load_nxv2i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_strided.load_nxv2i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_strided.load_nxv2i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i16> %x
+}
+
+
+define <vscale x 2 x i16> @test_nontemporal_vp_strided.load_nxv2i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i16> %x
+}
+
+define <vscale x 2 x i16> @test_nontemporal_vp_strided.load_nxv2i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i16> @llvm.experimental.vp.strided.load.nxv2i16.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i16_P1(<vscale x 2 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i16.i64(<vscale x 2 x i16> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i16_PALL(<vscale x 2 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i16.i64(<vscale x 2 x i16> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i16_S1(<vscale x 2 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i16.i64(<vscale x 2 x i16> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i16_ALL(<vscale x 2 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i16.i64(<vscale x 2 x i16> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2i16_DEFAULT(<vscale x 2 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i16.i64(<vscale x 2 x i16> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_load_nxv2i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_load_nxv2i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_load_nxv2i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_load_nxv2i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 2 x i32> @test_nontemporal_vp_load_nxv2i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i32_P1(<vscale x 2 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i32_PALL(<vscale x 2 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i32_S1(<vscale x 2 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i32_ALL(<vscale x 2 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2i32_DEFAULT(<vscale x 2 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_gather_nxv2i32_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_gather_nxv2i32_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_gather_nxv2i32_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_gather_nxv2i32_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 2 x i32> @test_nontemporal_vp_gather_nxv2i32_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i32_P1(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i32_PALL(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i32_S1(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i32_ALL(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2i32_DEFAULT(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_strided.load_nxv2i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_strided.load_nxv2i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_strided.load_nxv2i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i32> %x
+}
+
+
+define <vscale x 2 x i32> @test_nontemporal_vp_strided.load_nxv2i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 2 x i32> @test_nontemporal_vp_strided.load_nxv2i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i32_P1(<vscale x 2 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i32.i64(<vscale x 2 x i32> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i32_PALL(<vscale x 2 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i32.i64(<vscale x 2 x i32> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i32_S1(<vscale x 2 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i32.i64(<vscale x 2 x i32> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i32_ALL(<vscale x 2 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i32.i64(<vscale x 2 x i32> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2i32_DEFAULT(<vscale x 2 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i32.i64(<vscale x 2 x i32> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_load_nxv2i64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_load_nxv2i64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_load_nxv2i64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_load_nxv2i64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 2 x i64> @test_nontemporal_vp_load_nxv2i64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i64_P1(<vscale x 2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i64_PALL(<vscale x 2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i64_S1(<vscale x 2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2i64_ALL(<vscale x 2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2i64_DEFAULT(<vscale x 2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_gather_nxv2i64_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_gather_nxv2i64_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_gather_nxv2i64_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_gather_nxv2i64_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 2 x i64> @test_nontemporal_vp_gather_nxv2i64_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i64_P1(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i64_PALL(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i64_S1(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2i64_ALL(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2i64_DEFAULT(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_strided.load_nxv2i64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_strided.load_nxv2i64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_strided.load_nxv2i64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x i64> %x
+}
+
+
+define <vscale x 2 x i64> @test_nontemporal_vp_strided.load_nxv2i64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x i64> %x
+}
+
+define <vscale x 2 x i64> @test_nontemporal_vp_strided.load_nxv2i64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i64_P1(<vscale x 2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i64_PALL(<vscale x 2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i64_S1(<vscale x 2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2i64_ALL(<vscale x 2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2i64_DEFAULT(<vscale x 2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_load_nxv2f32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_load_nxv2f32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_load_nxv2f32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_load_nxv2f32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x float> %x
+}
+
+define <vscale x 2 x float> @test_nontemporal_vp_load_nxv2f32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x float> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f32_P1(<vscale x 2 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f32_PALL(<vscale x 2 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f32_S1(<vscale x 2 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f32_ALL(<vscale x 2 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2f32_DEFAULT(<vscale x 2 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_gather_nxv2f32_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_gather_nxv2f32_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_gather_nxv2f32_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_gather_nxv2f32_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x float> %x
+}
+
+define <vscale x 2 x float> @test_nontemporal_vp_gather_nxv2f32_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x float> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f32_P1(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f32_PALL(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f32_S1(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f32_ALL(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2f32_DEFAULT(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_strided.load_nxv2f32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_strided.load_nxv2f32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_strided.load_nxv2f32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x float> %x
+}
+
+
+define <vscale x 2 x float> @test_nontemporal_vp_strided.load_nxv2f32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x float> %x
+}
+
+define <vscale x 2 x float> @test_nontemporal_vp_strided.load_nxv2f32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x float> @llvm.experimental.vp.strided.load.nxv2f32.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x float> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f32_P1(<vscale x 2 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f32.i64(<vscale x 2 x float> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f32_PALL(<vscale x 2 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f32.i64(<vscale x 2 x float> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f32_S1(<vscale x 2 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f32.i64(<vscale x 2 x float> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f32_ALL(<vscale x 2 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f32.i64(<vscale x 2 x float> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2f32_DEFAULT(<vscale x 2 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f32.i64(<vscale x 2 x float> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_load_nxv2f64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_load_nxv2f64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_load_nxv2f64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_load_nxv2f64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x double> %x
+}
+
+define <vscale x 2 x double> @test_nontemporal_vp_load_nxv2f64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x double> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f64_P1(<vscale x 2 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f64_PALL(<vscale x 2 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f64_S1(<vscale x 2 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv2f64_ALL(<vscale x 2 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv2f64_DEFAULT(<vscale x 2 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> %val, ptr %p, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_gather_nxv2f64_P1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_gather_nxv2f64_PALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_gather_nxv2f64_S1(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_gather_nxv2f64_ALL(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x double> %x
+}
+
+define <vscale x 2 x double> @test_nontemporal_vp_gather_nxv2f64_DEFAULT(<vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: vmv1r.v v10, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: vmv1r.v v10, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x double> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f64_P1(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f64_PALL(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f64_S1(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv2f64_ALL(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv2f64_DEFAULT(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_strided.load_nxv2f64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_strided.load_nxv2f64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_strided.load_nxv2f64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 2 x double> %x
+}
+
+
+define <vscale x 2 x double> @test_nontemporal_vp_strided.load_nxv2f64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 2 x double> %x
+}
+
+define <vscale x 2 x double> @test_nontemporal_vp_strided.load_nxv2f64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.i64(ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 2 x double> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f64_P1(<vscale x 2 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f64.i64(<vscale x 2 x double> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f64_PALL(<vscale x 2 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f64.i64(<vscale x 2 x double> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f64_S1(<vscale x 2 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f64.i64(<vscale x 2 x double> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv2f64_ALL(<vscale x 2 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f64.i64(<vscale x 2 x double> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv2f64_DEFAULT(<vscale x 2 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv2f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv2f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv2f64.i64(<vscale x 2 x double> %val, ptr %p, i64 %stride, <vscale x 2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_load_nxv4i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_load_nxv4i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_load_nxv4i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_load_nxv4i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i8> %x
+}
+
+define <vscale x 4 x i8> @test_nontemporal_vp_load_nxv4i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i8_P1(<vscale x 4 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i8_PALL(<vscale x 4 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i8_S1(<vscale x 4 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i8_ALL(<vscale x 4 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4i8_DEFAULT(<vscale x 4 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_gather_nxv4i8_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_gather_nxv4i8_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_gather_nxv4i8_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_gather_nxv4i8_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i8> %x
+}
+
+define <vscale x 4 x i8> @test_nontemporal_vp_gather_nxv4i8_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv1r.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv1r.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv1r.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv1r.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i8_P1(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i8_PALL(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i8_S1(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i8_ALL(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4i8_DEFAULT(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_strided.load_nxv4i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_strided.load_nxv4i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_strided.load_nxv4i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i8> %x
+}
+
+
+define <vscale x 4 x i8> @test_nontemporal_vp_strided.load_nxv4i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i8> %x
+}
+
+define <vscale x 4 x i8> @test_nontemporal_vp_strided.load_nxv4i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i8_P1(<vscale x 4 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i8_PALL(<vscale x 4 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i8_S1(<vscale x 4 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i8_ALL(<vscale x 4 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4i8_DEFAULT(<vscale x 4 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_load_nxv4i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_load_nxv4i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_load_nxv4i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_load_nxv4i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 4 x i16> @test_nontemporal_vp_load_nxv4i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i16_P1(<vscale x 4 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i16_PALL(<vscale x 4 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i16_S1(<vscale x 4 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i16_ALL(<vscale x 4 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4i16_DEFAULT(<vscale x 4 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_gather_nxv4i16_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_gather_nxv4i16_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_gather_nxv4i16_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_gather_nxv4i16_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 4 x i16> @test_nontemporal_vp_gather_nxv4i16_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i16_P1(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i16_PALL(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i16_S1(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i16_ALL(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4i16_DEFAULT(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_strided.load_nxv4i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_strided.load_nxv4i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_strided.load_nxv4i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i16> %x
+}
+
+
+define <vscale x 4 x i16> @test_nontemporal_vp_strided.load_nxv4i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 4 x i16> @test_nontemporal_vp_strided.load_nxv4i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i16> @llvm.experimental.vp.strided.load.nxv4i16.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i16_P1(<vscale x 4 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i16.i64(<vscale x 4 x i16> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i16_PALL(<vscale x 4 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i16.i64(<vscale x 4 x i16> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i16_S1(<vscale x 4 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i16.i64(<vscale x 4 x i16> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i16_ALL(<vscale x 4 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i16.i64(<vscale x 4 x i16> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4i16_DEFAULT(<vscale x 4 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i16.i64(<vscale x 4 x i16> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_load_nxv4i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_load_nxv4i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_load_nxv4i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_load_nxv4i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_nontemporal_vp_load_nxv4i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i32_P1(<vscale x 4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i32_PALL(<vscale x 4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i32_S1(<vscale x 4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i32_ALL(<vscale x 4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4i32_DEFAULT(<vscale x 4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_gather_nxv4i32_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_gather_nxv4i32_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_gather_nxv4i32_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_gather_nxv4i32_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_nontemporal_vp_gather_nxv4i32_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i32_P1(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i32_PALL(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i32_S1(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i32_ALL(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4i32_DEFAULT(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_strided.load_nxv4i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_strided.load_nxv4i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_strided.load_nxv4i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i32> %x
+}
+
+
+define <vscale x 4 x i32> @test_nontemporal_vp_strided.load_nxv4i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i32> %x
+}
+
+define <vscale x 4 x i32> @test_nontemporal_vp_strided.load_nxv4i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i32> @llvm.experimental.vp.strided.load.nxv4i32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i32_P1(<vscale x 4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i32.i64(<vscale x 4 x i32> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i32_PALL(<vscale x 4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i32.i64(<vscale x 4 x i32> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i32_S1(<vscale x 4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i32.i64(<vscale x 4 x i32> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i32_ALL(<vscale x 4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i32.i64(<vscale x 4 x i32> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4i32_DEFAULT(<vscale x 4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i32.i64(<vscale x 4 x i32> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_load_nxv4i64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_load_nxv4i64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_load_nxv4i64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_load_nxv4i64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_nontemporal_vp_load_nxv4i64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i64_P1(<vscale x 4 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i64_PALL(<vscale x 4 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i64_S1(<vscale x 4 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4i64_ALL(<vscale x 4 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4i64_DEFAULT(<vscale x 4 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_gather_nxv4i64_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_gather_nxv4i64_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_gather_nxv4i64_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_gather_nxv4i64_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_nontemporal_vp_gather_nxv4i64_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i64_P1(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i64_PALL(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i64_S1(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4i64_ALL(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4i64_DEFAULT(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_strided.load_nxv4i64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_strided.load_nxv4i64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_strided.load_nxv4i64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x i64> %x
+}
+
+
+define <vscale x 4 x i64> @test_nontemporal_vp_strided.load_nxv4i64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x i64> %x
+}
+
+define <vscale x 4 x i64> @test_nontemporal_vp_strided.load_nxv4i64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i64_P1(<vscale x 4 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i64_PALL(<vscale x 4 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i64_S1(<vscale x 4 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4i64_ALL(<vscale x 4 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4i64_DEFAULT(<vscale x 4 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_load_nxv4f32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_load_nxv4f32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_load_nxv4f32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_load_nxv4f32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x float> %x
+}
+
+define <vscale x 4 x float> @test_nontemporal_vp_load_nxv4f32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x float> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f32_P1(<vscale x 4 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f32_PALL(<vscale x 4 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f32_S1(<vscale x 4 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f32_ALL(<vscale x 4 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4f32_DEFAULT(<vscale x 4 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_gather_nxv4f32_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_gather_nxv4f32_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_gather_nxv4f32_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_gather_nxv4f32_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x float> %x
+}
+
+define <vscale x 4 x float> @test_nontemporal_vp_gather_nxv4f32_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x float> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f32_P1(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f32_PALL(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f32_S1(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f32_ALL(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4f32_DEFAULT(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_strided.load_nxv4f32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_strided.load_nxv4f32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_strided.load_nxv4f32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x float> %x
+}
+
+
+define <vscale x 4 x float> @test_nontemporal_vp_strided.load_nxv4f32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x float> %x
+}
+
+define <vscale x 4 x float> @test_nontemporal_vp_strided.load_nxv4f32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x float> @llvm.experimental.vp.strided.load.nxv4f32.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x float> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f32_P1(<vscale x 4 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f32.i64(<vscale x 4 x float> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f32_PALL(<vscale x 4 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f32.i64(<vscale x 4 x float> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f32_S1(<vscale x 4 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f32.i64(<vscale x 4 x float> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f32_ALL(<vscale x 4 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f32.i64(<vscale x 4 x float> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4f32_DEFAULT(<vscale x 4 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f32.i64(<vscale x 4 x float> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_load_nxv4f64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_load_nxv4f64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_load_nxv4f64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_load_nxv4f64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x double> %x
+}
+
+define <vscale x 4 x double> @test_nontemporal_vp_load_nxv4f64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.load.nxv4f64.p0(ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x double> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f64_P1(<vscale x 4 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f64.p0(<vscale x 4 x double> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f64_PALL(<vscale x 4 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f64.p0(<vscale x 4 x double> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f64_S1(<vscale x 4 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f64.p0(<vscale x 4 x double> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv4f64_ALL(<vscale x 4 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f64.p0(<vscale x 4 x double> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv4f64_DEFAULT(<vscale x 4 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv4f64.p0(<vscale x 4 x double> %val, ptr %p, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_gather_nxv4f64_P1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_gather_nxv4f64_PALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_gather_nxv4f64_S1(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_gather_nxv4f64_ALL(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x double> %x
+}
+
+define <vscale x 4 x double> @test_nontemporal_vp_gather_nxv4f64_DEFAULT(<vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: vmv2r.v v12, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: vmv2r.v v12, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x double> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f64_P1(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f64_PALL(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f64_S1(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv4f64_ALL(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv4f64_DEFAULT(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_strided.load_nxv4f64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_strided.load_nxv4f64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_strided.load_nxv4f64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 4 x double> %x
+}
+
+
+define <vscale x 4 x double> @test_nontemporal_vp_strided.load_nxv4f64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 4 x double> %x
+}
+
+define <vscale x 4 x double> @test_nontemporal_vp_strided.load_nxv4f64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 4 x double> @llvm.experimental.vp.strided.load.nxv4f64.i64(ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 4 x double> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f64_P1(<vscale x 4 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f64.i64(<vscale x 4 x double> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f64_PALL(<vscale x 4 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f64.i64(<vscale x 4 x double> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f64_S1(<vscale x 4 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f64.i64(<vscale x 4 x double> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv4f64_ALL(<vscale x 4 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f64.i64(<vscale x 4 x double> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv4f64_DEFAULT(<vscale x 4 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv4f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv4f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv4f64.i64(<vscale x 4 x double> %val, ptr %p, i64 %stride, <vscale x 4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_load_nxv8i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_load_nxv8i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_load_nxv8i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_load_nxv8i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i8> %x
+}
+
+define <vscale x 8 x i8> @test_nontemporal_vp_load_nxv8i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i8_P1(<vscale x 8 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i8_PALL(<vscale x 8 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i8_S1(<vscale x 8 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i8_ALL(<vscale x 8 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8i8_DEFAULT(<vscale x 8 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_gather_nxv8i8_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_gather_nxv8i8_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_gather_nxv8i8_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_gather_nxv8i8_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i8> %x
+}
+
+define <vscale x 8 x i8> @test_nontemporal_vp_gather_nxv8i8_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i8_P1(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i8_PALL(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i8_S1(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i8_ALL(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8i8_DEFAULT(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_strided.load_nxv8i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_strided.load_nxv8i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_strided.load_nxv8i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i8> %x
+}
+
+
+define <vscale x 8 x i8> @test_nontemporal_vp_strided.load_nxv8i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i8> %x
+}
+
+define <vscale x 8 x i8> @test_nontemporal_vp_strided.load_nxv8i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i8_P1(<vscale x 8 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i8_PALL(<vscale x 8 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i8_S1(<vscale x 8 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i8_ALL(<vscale x 8 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8i8_DEFAULT(<vscale x 8 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_load_nxv8i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_load_nxv8i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_load_nxv8i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_load_nxv8i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_nontemporal_vp_load_nxv8i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i16_P1(<vscale x 8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i16_PALL(<vscale x 8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i16_S1(<vscale x 8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i16_ALL(<vscale x 8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8i16_DEFAULT(<vscale x 8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_gather_nxv8i16_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_gather_nxv8i16_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_gather_nxv8i16_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_gather_nxv8i16_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_nontemporal_vp_gather_nxv8i16_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i16_P1(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i16_PALL(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i16_S1(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i16_ALL(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8i16_DEFAULT(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_strided.load_nxv8i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_strided.load_nxv8i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_strided.load_nxv8i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i16> %x
+}
+
+
+define <vscale x 8 x i16> @test_nontemporal_vp_strided.load_nxv8i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i16> %x
+}
+
+define <vscale x 8 x i16> @test_nontemporal_vp_strided.load_nxv8i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i16> @llvm.experimental.vp.strided.load.nxv8i16.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i16_P1(<vscale x 8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i16.i64(<vscale x 8 x i16> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i16_PALL(<vscale x 8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i16.i64(<vscale x 8 x i16> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i16_S1(<vscale x 8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i16.i64(<vscale x 8 x i16> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i16_ALL(<vscale x 8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i16.i64(<vscale x 8 x i16> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8i16_DEFAULT(<vscale x 8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i16.i64(<vscale x 8 x i16> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_load_nxv8i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_load_nxv8i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_load_nxv8i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_load_nxv8i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_nontemporal_vp_load_nxv8i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i32_P1(<vscale x 8 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i32_PALL(<vscale x 8 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i32_S1(<vscale x 8 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i32_ALL(<vscale x 8 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8i32_DEFAULT(<vscale x 8 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_gather_nxv8i32_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_gather_nxv8i32_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_gather_nxv8i32_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_gather_nxv8i32_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_nontemporal_vp_gather_nxv8i32_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i32_P1(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i32_PALL(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i32_S1(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i32_ALL(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8i32_DEFAULT(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_strided.load_nxv8i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_strided.load_nxv8i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_strided.load_nxv8i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i32> %x
+}
+
+
+define <vscale x 8 x i32> @test_nontemporal_vp_strided.load_nxv8i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i32> %x
+}
+
+define <vscale x 8 x i32> @test_nontemporal_vp_strided.load_nxv8i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i32> @llvm.experimental.vp.strided.load.nxv8i32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i32_P1(<vscale x 8 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i32.i64(<vscale x 8 x i32> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i32_PALL(<vscale x 8 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i32.i64(<vscale x 8 x i32> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i32_S1(<vscale x 8 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i32.i64(<vscale x 8 x i32> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i32_ALL(<vscale x 8 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i32.i64(<vscale x 8 x i32> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8i32_DEFAULT(<vscale x 8 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i32.i64(<vscale x 8 x i32> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_load_nxv8i64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_load_nxv8i64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_load_nxv8i64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_load_nxv8i64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i64> %x
+}
+
+define <vscale x 8 x i64> @test_nontemporal_vp_load_nxv8i64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i64_P1(<vscale x 8 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i64_PALL(<vscale x 8 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i64_S1(<vscale x 8 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8i64_ALL(<vscale x 8 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8i64_DEFAULT(<vscale x 8 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_gather_nxv8i64_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_gather_nxv8i64_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_gather_nxv8i64_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_gather_nxv8i64_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i64> %x
+}
+
+define <vscale x 8 x i64> @test_nontemporal_vp_gather_nxv8i64_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i64_P1(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i64_PALL(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i64_S1(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8i64_ALL(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8i64_DEFAULT(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_strided.load_nxv8i64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_strided.load_nxv8i64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_strided.load_nxv8i64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x i64> %x
+}
+
+
+define <vscale x 8 x i64> @test_nontemporal_vp_strided.load_nxv8i64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x i64> %x
+}
+
+define <vscale x 8 x i64> @test_nontemporal_vp_strided.load_nxv8i64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i64_P1(<vscale x 8 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i64_PALL(<vscale x 8 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i64_S1(<vscale x 8 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8i64_ALL(<vscale x 8 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8i64_DEFAULT(<vscale x 8 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_load_nxv8f32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_load_nxv8f32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_load_nxv8f32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_load_nxv8f32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x float> %x
+}
+
+define <vscale x 8 x float> @test_nontemporal_vp_load_nxv8f32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.load.nxv8f32.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x float> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f32_P1(<vscale x 8 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f32.p0(<vscale x 8 x float> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f32_PALL(<vscale x 8 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f32.p0(<vscale x 8 x float> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f32_S1(<vscale x 8 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f32.p0(<vscale x 8 x float> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f32_ALL(<vscale x 8 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f32.p0(<vscale x 8 x float> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8f32_DEFAULT(<vscale x 8 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f32.p0(<vscale x 8 x float> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_gather_nxv8f32_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_gather_nxv8f32_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_gather_nxv8f32_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_gather_nxv8f32_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x float> %x
+}
+
+define <vscale x 8 x float> @test_nontemporal_vp_gather_nxv8f32_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x float> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f32_P1(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f32_PALL(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f32_S1(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f32_ALL(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8f32_DEFAULT(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_strided.load_nxv8f32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_strided.load_nxv8f32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_strided.load_nxv8f32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x float> %x
+}
+
+
+define <vscale x 8 x float> @test_nontemporal_vp_strided.load_nxv8f32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x float> %x
+}
+
+define <vscale x 8 x float> @test_nontemporal_vp_strided.load_nxv8f32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x float> @llvm.experimental.vp.strided.load.nxv8f32.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x float> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f32_P1(<vscale x 8 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f32.i64(<vscale x 8 x float> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f32_PALL(<vscale x 8 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f32.i64(<vscale x 8 x float> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f32_S1(<vscale x 8 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f32.i64(<vscale x 8 x float> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f32_ALL(<vscale x 8 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f32.i64(<vscale x 8 x float> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8f32_DEFAULT(<vscale x 8 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f32.i64(<vscale x 8 x float> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_load_nxv8f64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_load_nxv8f64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_load_nxv8f64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_load_nxv8f64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x double> %x
+}
+
+define <vscale x 8 x double> @test_nontemporal_vp_load_nxv8f64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.load.nxv8f64.p0(ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x double> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f64_P1(<vscale x 8 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f64.p0(<vscale x 8 x double> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f64_PALL(<vscale x 8 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f64.p0(<vscale x 8 x double> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f64_S1(<vscale x 8 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f64.p0(<vscale x 8 x double> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv8f64_ALL(<vscale x 8 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f64.p0(<vscale x 8 x double> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv8f64_DEFAULT(<vscale x 8 x double> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv8f64.p0(<vscale x 8 x double> %val, ptr %p, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_gather_nxv8f64_P1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_gather_nxv8f64_PALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_gather_nxv8f64_S1(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_gather_nxv8f64_ALL(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x double> %x
+}
+
+define <vscale x 8 x double> @test_nontemporal_vp_gather_nxv8f64_DEFAULT(<vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: vmv4r.v v16, v8
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: vmv4r.v v16, v8
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x double> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f64_P1(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f64_PALL(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f64_S1(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv8f64_ALL(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv8f64_DEFAULT(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_strided.load_nxv8f64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_strided.load_nxv8f64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_strided.load_nxv8f64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 8 x double> %x
+}
+
+
+define <vscale x 8 x double> @test_nontemporal_vp_strided.load_nxv8f64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 8 x double> %x
+}
+
+define <vscale x 8 x double> @test_nontemporal_vp_strided.load_nxv8f64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.i64(ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 8 x double> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f64_P1(<vscale x 8 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f64.i64(<vscale x 8 x double> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f64_PALL(<vscale x 8 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f64.i64(<vscale x 8 x double> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f64_S1(<vscale x 8 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f64.i64(<vscale x 8 x double> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv8f64_ALL(<vscale x 8 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f64.i64(<vscale x 8 x double> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv8f64_DEFAULT(<vscale x 8 x double> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv8f64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv8f64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv8f64.i64(<vscale x 8 x double> %val, ptr %p, i64 %stride, <vscale x 8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_load_nxv16i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_load_nxv16i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_load_nxv16i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_load_nxv16i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i8> %x
+}
+
+define <vscale x 16 x i8> @test_nontemporal_vp_load_nxv16i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i8_P1(<vscale x 16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i8_PALL(<vscale x 16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i8_S1(<vscale x 16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i8_ALL(<vscale x 16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv16i8_DEFAULT(<vscale x 16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_gather_nxv16i8_P1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB730_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB730_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv2r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB730_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB730_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv2r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_gather_nxv16i8_PALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB731_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB731_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv2r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB731_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB731_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv2r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_gather_nxv16i8_S1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB732_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB732_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv2r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB732_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB732_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv2r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_gather_nxv16i8_ALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB733_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB733_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv2r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB733_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB733_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv2r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i8> %x
+}
+
+define <vscale x 16 x i8> @test_nontemporal_vp_gather_nxv16i8_DEFAULT(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB734_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB734_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv2r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB734_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB734_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv2r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i8_P1(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB735_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB735_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB735_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB735_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i8_PALL(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB736_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB736_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB736_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB736_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i8_S1(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB737_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB737_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB737_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB737_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i8_ALL(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB738_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB738_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB738_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB738_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv16i8_DEFAULT(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB739_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB739_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB739_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB739_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v9, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_strided.load_nxv16i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_strided.load_nxv16i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_strided.load_nxv16i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i8> %x
+}
+
+
+define <vscale x 16 x i8> @test_nontemporal_vp_strided.load_nxv16i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i8> %x
+}
+
+define <vscale x 16 x i8> @test_nontemporal_vp_strided.load_nxv16i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i8_P1(<vscale x 16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i8_PALL(<vscale x 16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i8_S1(<vscale x 16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i8_ALL(<vscale x 16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv16i8_DEFAULT(<vscale x 16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_load_nxv16i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.load.nxv16i16.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_load_nxv16i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.load.nxv16i16.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_load_nxv16i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.load.nxv16i16.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_load_nxv16i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.load.nxv16i16.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i16> %x
+}
+
+define <vscale x 16 x i16> @test_nontemporal_vp_load_nxv16i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.load.nxv16i16.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i16_P1(<vscale x 16 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i16.p0(<vscale x 16 x i16> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i16_PALL(<vscale x 16 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i16.p0(<vscale x 16 x i16> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i16_S1(<vscale x 16 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i16.p0(<vscale x 16 x i16> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i16_ALL(<vscale x 16 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i16.p0(<vscale x 16 x i16> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv16i16_DEFAULT(<vscale x 16 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i16.p0(<vscale x 16 x i16> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_gather_nxv16i16_P1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB760_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB760_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB760_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB760_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_gather_nxv16i16_PALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB761_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB761_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB761_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB761_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_gather_nxv16i16_S1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB762_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB762_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB762_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB762_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_gather_nxv16i16_ALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB763_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB763_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB763_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB763_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i16> %x
+}
+
+define <vscale x 16 x i16> @test_nontemporal_vp_gather_nxv16i16_DEFAULT(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB764_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB764_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB764_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB764_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v16, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.vp.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i16_P1(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB765_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB765_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB765_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB765_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i16_PALL(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB766_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB766_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB766_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB766_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i16_S1(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB767_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB767_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB767_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB767_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i16_ALL(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB768_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB768_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB768_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB768_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv16i16_DEFAULT(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB769_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB769_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB769_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB769_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_strided.load_nxv16i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.experimental.vp.strided.load.nxv16i16.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_strided.load_nxv16i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.experimental.vp.strided.load.nxv16i16.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_strided.load_nxv16i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.experimental.vp.strided.load.nxv16i16.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i16> %x
+}
+
+
+define <vscale x 16 x i16> @test_nontemporal_vp_strided.load_nxv16i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.experimental.vp.strided.load.nxv16i16.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i16> %x
+}
+
+define <vscale x 16 x i16> @test_nontemporal_vp_strided.load_nxv16i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i16> @llvm.experimental.vp.strided.load.nxv16i16.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i16_P1(<vscale x 16 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i16.i64(<vscale x 16 x i16> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i16_PALL(<vscale x 16 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i16.i64(<vscale x 16 x i16> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i16_S1(<vscale x 16 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i16.i64(<vscale x 16 x i16> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i16_ALL(<vscale x 16 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i16.i64(<vscale x 16 x i16> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv16i16_DEFAULT(<vscale x 16 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i16.i64(<vscale x 16 x i16> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_load_nxv16i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_load_nxv16i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_load_nxv16i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_load_nxv16i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i32> %x
+}
+
+define <vscale x 16 x i32> @test_nontemporal_vp_load_nxv16i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i32_P1(<vscale x 16 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i32_PALL(<vscale x 16 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i32_S1(<vscale x 16 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16i32_ALL(<vscale x 16 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv16i32_DEFAULT(<vscale x 16 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_gather_nxv16i32_P1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB790_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB790_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB790_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB790_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_gather_nxv16i32_PALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB791_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB791_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB791_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB791_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_gather_nxv16i32_S1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB792_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB792_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB792_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB792_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_gather_nxv16i32_ALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB793_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB793_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB793_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB793_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i32> %x
+}
+
+define <vscale x 16 x i32> @test_nontemporal_vp_gather_nxv16i32_DEFAULT(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB794_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB794_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB794_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB794_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.vp.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i32_P1(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB795_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB795_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB795_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB795_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i32_PALL(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB796_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB796_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB796_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB796_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i32_S1(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB797_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB797_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB797_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB797_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16i32_ALL(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB798_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB798_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB798_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB798_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv16i32_DEFAULT(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB799_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB799_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB799_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB799_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_strided.load_nxv16i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.experimental.vp.strided.load.nxv16i32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_strided.load_nxv16i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.experimental.vp.strided.load.nxv16i32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_strided.load_nxv16i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.experimental.vp.strided.load.nxv16i32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x i32> %x
+}
+
+
+define <vscale x 16 x i32> @test_nontemporal_vp_strided.load_nxv16i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.experimental.vp.strided.load.nxv16i32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x i32> %x
+}
+
+define <vscale x 16 x i32> @test_nontemporal_vp_strided.load_nxv16i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x i32> @llvm.experimental.vp.strided.load.nxv16i32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i32_P1(<vscale x 16 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i32.i64(<vscale x 16 x i32> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i32_PALL(<vscale x 16 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i32.i64(<vscale x 16 x i32> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i32_S1(<vscale x 16 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i32.i64(<vscale x 16 x i32> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16i32_ALL(<vscale x 16 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i32.i64(<vscale x 16 x i32> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv16i32_DEFAULT(<vscale x 16 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16i32.i64(<vscale x 16 x i32> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_load_nxv16f32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.load.nxv16f32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_load_nxv16f32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.load.nxv16f32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_load_nxv16f32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.load.nxv16f32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_load_nxv16f32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.load.nxv16f32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x float> %x
+}
+
+define <vscale x 16 x float> @test_nontemporal_vp_load_nxv16f32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.load.nxv16f32.p0(ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x float> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv16f32_P1(<vscale x 16 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16f32.p0(<vscale x 16 x float> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16f32_PALL(<vscale x 16 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16f32.p0(<vscale x 16 x float> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16f32_S1(<vscale x 16 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16f32.p0(<vscale x 16 x float> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv16f32_ALL(<vscale x 16 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16f32.p0(<vscale x 16 x float> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv16f32_DEFAULT(<vscale x 16 x float> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv16f32.p0(<vscale x 16 x float> %val, ptr %p, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_gather_nxv16f32_P1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB820_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB820_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB820_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB820_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_gather_nxv16f32_PALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB821_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB821_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB821_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB821_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_gather_nxv16f32_S1(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB822_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB822_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB822_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB822_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_gather_nxv16f32_ALL(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB823_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB823_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB823_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB823_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x float> %x
+}
+
+define <vscale x 16 x float> @test_nontemporal_vp_gather_nxv16f32_DEFAULT(<vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: sub a2, a0, a1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB824_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB824_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: sub a2, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB824_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB824_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.vp.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x float> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16f32_P1(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB825_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB825_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB825_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB825_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16f32_PALL(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB826_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB826_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB826_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB826_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16f32_S1(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB827_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB827_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB827_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB827_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv16f32_ALL(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB828_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB828_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB828_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB828_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv16f32_DEFAULT(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: bltu a1, a0, .LBB829_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: .LBB829_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a1, a0
+; CHECK-RV64V-NEXT: sltu a1, a1, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: bltu a1, a0, .LBB829_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: .LBB829_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a1, a0
+; CHECK-RV64VC-NEXT: sltu a1, a1, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_strided.load_nxv16f32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.experimental.vp.strided.load.nxv16f32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_strided.load_nxv16f32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.experimental.vp.strided.load.nxv16f32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_strided.load_nxv16f32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.experimental.vp.strided.load.nxv16f32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 16 x float> %x
+}
+
+
+define <vscale x 16 x float> @test_nontemporal_vp_strided.load_nxv16f32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.experimental.vp.strided.load.nxv16f32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 16 x float> %x
+}
+
+define <vscale x 16 x float> @test_nontemporal_vp_strided.load_nxv16f32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 16 x float> @llvm.experimental.vp.strided.load.nxv16f32.i64(ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 16 x float> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16f32_P1(<vscale x 16 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16f32.i64(<vscale x 16 x float> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16f32_PALL(<vscale x 16 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16f32.i64(<vscale x 16 x float> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16f32_S1(<vscale x 16 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16f32.i64(<vscale x 16 x float> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv16f32_ALL(<vscale x 16 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16f32.i64(<vscale x 16 x float> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv16f32_DEFAULT(<vscale x 16 x float> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv16f32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv16f32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv16f32.i64(<vscale x 16 x float> %val, ptr %p, i64 %stride, <vscale x 16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_load_nxv32i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.load.nxv32i8.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_load_nxv32i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.load.nxv32i8.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_load_nxv32i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.load.nxv32i8.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_load_nxv32i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.load.nxv32i8.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i8> %x
+}
+
+define <vscale x 32 x i8> @test_nontemporal_vp_load_nxv32i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.load.nxv32i8.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i8_P1(<vscale x 32 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i8_PALL(<vscale x 32 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i8_S1(<vscale x 32 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i8_ALL(<vscale x 32 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv32i8_DEFAULT(<vscale x 32 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_gather_nxv32i8_P1(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB850_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB850_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB850_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB850_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB850_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB850_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB850_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB850_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv4r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB850_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB850_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB850_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB850_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB850_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB850_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB850_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB850_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_gather_nxv32i8_PALL(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB851_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB851_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB851_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB851_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB851_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB851_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB851_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB851_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv4r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB851_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB851_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB851_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB851_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB851_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB851_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB851_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB851_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_gather_nxv32i8_S1(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB852_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB852_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB852_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB852_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB852_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB852_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB852_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB852_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv4r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB852_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB852_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB852_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB852_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB852_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB852_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB852_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB852_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_gather_nxv32i8_ALL(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB853_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB853_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB853_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB853_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB853_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB853_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB853_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB853_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv4r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB853_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB853_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB853_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB853_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB853_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB853_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB853_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB853_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i8> %x
+}
+
+define <vscale x 32 x i8> @test_nontemporal_vp_gather_nxv32i8_DEFAULT(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB854_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB854_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB854_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB854_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB854_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB854_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv4r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB854_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB854_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv4r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB854_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB854_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v27, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB854_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB854_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v25, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB854_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB854_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB854_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB854_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv4r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i8_P1(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB855_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB855_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a6, a0, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB855_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB855_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB855_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB855_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB855_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB855_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB855_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB855_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a5, a5, a0
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB855_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB855_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 4
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 3
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB855_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB855_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB855_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB855_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i8_PALL(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB856_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB856_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a6, a0, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB856_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB856_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB856_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB856_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB856_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB856_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB856_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB856_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a5, a5, a0
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB856_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB856_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 4
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 3
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB856_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB856_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB856_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB856_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i8_S1(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB857_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB857_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a6, a0, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB857_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB857_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB857_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB857_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB857_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB857_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB857_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB857_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a5, a5, a0
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB857_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB857_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 4
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 3
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB857_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB857_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB857_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB857_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i8_ALL(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB858_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB858_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a6, a0, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB858_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB858_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB858_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB858_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB858_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB858_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB858_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB858_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a5, a5, a0
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB858_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB858_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 4
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 3
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB858_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB858_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB858_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB858_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv32i8_DEFAULT(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB859_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB859_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a6, a0, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB859_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB859_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB859_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB859_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB859_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB859_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB859_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB859_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a5, a5, a0
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB859_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB859_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 4
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl4r.v v16, (a5) # vscale x 32-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a5, vlenb
+; CHECK-RV64VC-NEXT: slli a5, a5, 3
+; CHECK-RV64VC-NEXT: add a5, a5, sp
+; CHECK-RV64VC-NEXT: addi a5, a5, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v17, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB859_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB859_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v18, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v19, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB859_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB859_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_strided.load_nxv32i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.experimental.vp.strided.load.nxv32i8.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_strided.load_nxv32i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.experimental.vp.strided.load.nxv32i8.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_strided.load_nxv32i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.experimental.vp.strided.load.nxv32i8.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i8> %x
+}
+
+
+define <vscale x 32 x i8> @test_nontemporal_vp_strided.load_nxv32i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.experimental.vp.strided.load.nxv32i8.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i8> %x
+}
+
+define <vscale x 32 x i8> @test_nontemporal_vp_strided.load_nxv32i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i8> @llvm.experimental.vp.strided.load.nxv32i8.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i8_P1(<vscale x 32 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i8.i64(<vscale x 32 x i8> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i8_PALL(<vscale x 32 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i8.i64(<vscale x 32 x i8> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i8_S1(<vscale x 32 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i8.i64(<vscale x 32 x i8> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i8_ALL(<vscale x 32 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i8.i64(<vscale x 32 x i8> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv32i8_DEFAULT(<vscale x 32 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i8.i64(<vscale x 32 x i8> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_load_nxv32i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.load.nxv32i16.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_load_nxv32i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.load.nxv32i16.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_load_nxv32i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.load.nxv32i16.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_load_nxv32i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.load.nxv32i16.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i16> %x
+}
+
+define <vscale x 32 x i16> @test_nontemporal_vp_load_nxv32i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.load.nxv32i16.p0(ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i16_P1(<vscale x 32 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i16.p0(<vscale x 32 x i16> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i16_PALL(<vscale x 32 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i16.p0(<vscale x 32 x i16> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i16_S1(<vscale x 32 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i16.p0(<vscale x 32 x i16> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv32i16_ALL(<vscale x 32 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i16.p0(<vscale x 32 x i16> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv32i16_DEFAULT(<vscale x 32 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv32i16.p0(<vscale x 32 x i16> %val, ptr %p, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_gather_nxv32i16_P1(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB880_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB880_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB880_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB880_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB880_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB880_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB880_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB880_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB880_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB880_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB880_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB880_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB880_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB880_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB880_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB880_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_gather_nxv32i16_PALL(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB881_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB881_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB881_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB881_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB881_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB881_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB881_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB881_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB881_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB881_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB881_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB881_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB881_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB881_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB881_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB881_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_gather_nxv32i16_S1(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB882_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB882_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB882_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB882_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB882_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB882_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB882_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB882_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB882_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB882_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB882_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB882_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB882_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB882_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB882_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB882_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_gather_nxv32i16_ALL(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB883_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB883_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB883_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB883_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB883_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB883_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB883_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB883_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB883_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB883_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB883_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB883_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB883_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB883_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB883_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB883_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i16> %x
+}
+
+define <vscale x 32 x i16> @test_nontemporal_vp_gather_nxv32i16_DEFAULT(<vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: slli a4, a1, 3
+; CHECK-RV64V-NEXT: slli a3, a1, 1
+; CHECK-RV64V-NEXT: add a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a2, a3
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: sltu a0, a2, a4
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: sub a4, a0, a1
+; CHECK-RV64V-NEXT: sltu a5, a0, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a4, a5, a4
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB884_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a0, a1
+; CHECK-RV64V-NEXT: .LBB884_2:
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: bltu a2, a3, .LBB884_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a2, a3
+; CHECK-RV64V-NEXT: .LBB884_4:
+; CHECK-RV64V-NEXT: sub a0, a2, a1
+; CHECK-RV64V-NEXT: sltu a3, a2, a0
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a0, a3, a0
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64V-NEXT: bltu a2, a1, .LBB884_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB884_6:
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64V-NEXT: vmv8r.v v8, v24
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: sub a2, a0, a1
+; CHECK-RV32V-NEXT: sltu a3, a0, a2
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a2, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB884_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a0, a1
+; CHECK-RV32V-NEXT: .LBB884_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: slli a4, a1, 3
+; CHECK-RV64VC-NEXT: slli a3, a1, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a2, a3
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: sltu a0, a2, a4
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a4
+; CHECK-RV64VC-NEXT: sub a4, a0, a1
+; CHECK-RV64VC-NEXT: sltu a5, a0, a4
+; CHECK-RV64VC-NEXT: addi a5, a5, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a5
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB884_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a0, a1
+; CHECK-RV64VC-NEXT: .LBB884_2:
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v30, (zero), v0
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: bltu a2, a3, .LBB884_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a2, a3
+; CHECK-RV64VC-NEXT: .LBB884_4:
+; CHECK-RV64VC-NEXT: sub a0, a2, a1
+; CHECK-RV64VC-NEXT: sltu a3, a2, a0
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v26, (zero), v16
+; CHECK-RV64VC-NEXT: bltu a2, a1, .LBB884_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB884_6:
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v24, (zero), v8
+; CHECK-RV64VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: sub a2, a0, a1
+; CHECK-RV32VC-NEXT: sltu a3, a0, a2
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a2, a2, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB884_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a0, a1
+; CHECK-RV32VC-NEXT: .LBB884_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.vp.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i16_P1(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB885_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB885_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a0, a0, a6
+; CHECK-RV64V-NEXT: mv a6, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB885_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a6, a1
+; CHECK-RV64V-NEXT: .LBB885_4:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a7
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: vsetvli zero, a6, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB885_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB885_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB885_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB885_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB885_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB885_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a0, a0, a5
+; CHECK-RV64VC-NEXT: mv a5, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB885_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a5, a1
+; CHECK-RV64VC-NEXT: .LBB885_4:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a7, vlenb
+; CHECK-RV64VC-NEXT: slli a7, a7, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a7
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB885_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB885_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB885_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB885_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i16_PALL(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB886_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB886_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a0, a0, a6
+; CHECK-RV64V-NEXT: mv a6, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB886_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a6, a1
+; CHECK-RV64V-NEXT: .LBB886_4:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a7
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: vsetvli zero, a6, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB886_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB886_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB886_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB886_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB886_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB886_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a0, a0, a5
+; CHECK-RV64VC-NEXT: mv a5, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB886_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a5, a1
+; CHECK-RV64VC-NEXT: .LBB886_4:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a7, vlenb
+; CHECK-RV64VC-NEXT: slli a7, a7, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a7
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB886_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB886_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB886_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB886_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i16_S1(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB887_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB887_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a0, a0, a6
+; CHECK-RV64V-NEXT: mv a6, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB887_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a6, a1
+; CHECK-RV64V-NEXT: .LBB887_4:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a7
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: vsetvli zero, a6, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB887_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB887_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB887_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB887_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB887_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB887_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a0, a0, a5
+; CHECK-RV64VC-NEXT: mv a5, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB887_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a5, a1
+; CHECK-RV64VC-NEXT: .LBB887_4:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a7, vlenb
+; CHECK-RV64VC-NEXT: slli a7, a7, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a7
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB887_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB887_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB887_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB887_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv32i16_ALL(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB888_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB888_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a0, a0, a6
+; CHECK-RV64V-NEXT: mv a6, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB888_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a6, a1
+; CHECK-RV64V-NEXT: .LBB888_4:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a7
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: vsetvli zero, a6, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB888_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB888_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB888_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB888_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB888_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB888_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a0, a0, a5
+; CHECK-RV64VC-NEXT: mv a5, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB888_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a5, a1
+; CHECK-RV64VC-NEXT: .LBB888_4:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a7, vlenb
+; CHECK-RV64VC-NEXT: slli a7, a7, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a7
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB888_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB888_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB888_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB888_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv32i16_DEFAULT(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a5, a1, 4
+; CHECK-RV64V-NEXT: slli a2, a1, 1
+; CHECK-RV64V-NEXT: slli a6, a1, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a2, .LBB889_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a4, a2
+; CHECK-RV64V-NEXT: .LBB889_2:
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64V-NEXT: add a5, a0, a5
+; CHECK-RV64V-NEXT: add a0, a0, a6
+; CHECK-RV64V-NEXT: mv a6, a4
+; CHECK-RV64V-NEXT: bltu a4, a1, .LBB889_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a6, a1
+; CHECK-RV64V-NEXT: .LBB889_4:
+; CHECK-RV64V-NEXT: addi sp, sp, -16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: sub sp, sp, a7
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64V-NEXT: vsetvli zero, a6, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, a1
+; CHECK-RV64V-NEXT: sub a2, a3, a2
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a2
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a2
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB889_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB889_6:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: addi a1, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64V-NEXT: addi sp, sp, 16
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 1
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: bltu a1, a0, .LBB889_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB889_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a1, a0
+; CHECK-RV32V-NEXT: sltu a1, a1, a0
+; CHECK-RV32V-NEXT: addi a1, a1, -1
+; CHECK-RV32V-NEXT: and a0, a1, a0
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a6, a1, 4
+; CHECK-RV64VC-NEXT: slli a2, a1, 1
+; CHECK-RV64VC-NEXT: slli a5, a1, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a2, .LBB889_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a4, a2
+; CHECK-RV64VC-NEXT: .LBB889_2:
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a0)
+; CHECK-RV64VC-NEXT: add a6, a6, a0
+; CHECK-RV64VC-NEXT: add a0, a0, a5
+; CHECK-RV64VC-NEXT: mv a5, a4
+; CHECK-RV64VC-NEXT: bltu a4, a1, .LBB889_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a5, a1
+; CHECK-RV64VC-NEXT: .LBB889_4:
+; CHECK-RV64VC-NEXT: addi sp, sp, -16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV64VC-NEXT: csrr a7, vlenb
+; CHECK-RV64VC-NEXT: slli a7, a7, 3
+; CHECK-RV64VC-NEXT: sub sp, sp, a7
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a6)
+; CHECK-RV64VC-NEXT: addi a6, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v24, (a0)
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, a1
+; CHECK-RV64VC-NEXT: sub a2, a3, a2
+; CHECK-RV64VC-NEXT: sltu a4, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a4, a4, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: vsetvli zero, a4, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v10, (zero), v0
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a1, .LBB889_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: .LBB889_6:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: sub a1, a0, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v14, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV64VC-NEXT: addi sp, sp, 16
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 1
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: bltu a1, a0, .LBB889_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB889_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a1, a0
+; CHECK-RV32VC-NEXT: sltu a1, a1, a0
+; CHECK-RV32VC-NEXT: addi a1, a1, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> %val, <vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_strided.load_nxv32i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.experimental.vp.strided.load.nxv32i16.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_strided.load_nxv32i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.experimental.vp.strided.load.nxv32i16.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_strided.load_nxv32i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.experimental.vp.strided.load.nxv32i16.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 32 x i16> %x
+}
+
+
+define <vscale x 32 x i16> @test_nontemporal_vp_strided.load_nxv32i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.experimental.vp.strided.load.nxv32i16.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 32 x i16> %x
+}
+
+define <vscale x 32 x i16> @test_nontemporal_vp_strided.load_nxv32i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 32 x i16> @llvm.experimental.vp.strided.load.nxv32i16.i64(ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 32 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i16_P1(<vscale x 32 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i16.i64(<vscale x 32 x i16> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i16_PALL(<vscale x 32 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i16.i64(<vscale x 32 x i16> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i16_S1(<vscale x 32 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i16.i64(<vscale x 32 x i16> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv32i16_ALL(<vscale x 32 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i16.i64(<vscale x 32 x i16> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv32i16_DEFAULT(<vscale x 32 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv32i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv32i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv32i16.i64(<vscale x 32 x i16> %val, ptr %p, i64 %stride, <vscale x 32 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_load_nxv64i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_load_nxv64i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_load_nxv64i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_load_nxv64i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 64 x i8> %x
+}
+
+define <vscale x 64 x i8> @test_nontemporal_vp_load_nxv64i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 64 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv64i8_P1(<vscale x 64 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> %val, ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv64i8_PALL(<vscale x 64 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> %val, ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv64i8_S1(<vscale x 64 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> %val, ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv64i8_ALL(<vscale x 64 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> %val, ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_nxv64i8_DEFAULT(<vscale x 64 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> %val, ptr %p, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_P1(<vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64V-NEXT: mv s0, a6
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s1, vlenb
+; CHECK-RV64V-NEXT: slli a0, s1, 3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 4
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s1, 2
+; CHECK-RV64V-NEXT: sub a1, s0, a2
+; CHECK-RV64V-NEXT: sltu a3, s0, a1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a3, a3, a1
+; CHECK-RV64V-NEXT: slli a1, s1, 1
+; CHECK-RV64V-NEXT: sub a4, a3, a1
+; CHECK-RV64V-NEXT: sltu a5, a3, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a6, a5, a4
+; CHECK-RV64V-NEXT: sub a4, a6, s1
+; CHECK-RV64V-NEXT: mv a5, a6
+; CHECK-RV64V-NEXT: bltu a6, s1, .LBB910_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a5, s1
+; CHECK-RV64V-NEXT: .LBB910_2:
+; CHECK-RV64V-NEXT: sltu a7, a6, a4
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB910_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: .LBB910_4:
+; CHECK-RV64V-NEXT: add a6, s2, a0
+; CHECK-RV64V-NEXT: addi a0, a7, -1
+; CHECK-RV64V-NEXT: sub a7, a3, s1
+; CHECK-RV64V-NEXT: sltu t0, a3, a7
+; CHECK-RV64V-NEXT: addi t0, t0, -1
+; CHECK-RV64V-NEXT: and a7, t0, a7
+; CHECK-RV64V-NEXT: bltu a3, s1, .LBB910_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: .LBB910_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a6)
+; CHECK-RV64V-NEXT: addi a6, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a7, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: bltu s0, a2, .LBB910_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv s0, a2
+; CHECK-RV64V-NEXT: .LBB910_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64V-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64V-NEXT: sub a0, s0, a1
+; CHECK-RV64V-NEXT: sltu a2, s0, a0
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: and a0, a2, a0
+; CHECK-RV64V-NEXT: sub a2, a0, s1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: bltu a0, s1, .LBB910_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: .LBB910_10:
+; CHECK-RV64V-NEXT: csrr a3, vlenb
+; CHECK-RV64V-NEXT: slli a3, a3, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: slli a3, a3, 1
+; CHECK-RV64V-NEXT: add a3, a3, a4
+; CHECK-RV64V-NEXT: add a3, sp, a3
+; CHECK-RV64V-NEXT: addi a3, a3, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, a1, .LBB910_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv s0, a1
+; CHECK-RV64V-NEXT: .LBB910_12:
+; CHECK-RV64V-NEXT: sub a0, s0, s1
+; CHECK-RV64V-NEXT: sltu a1, s0, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, s1, .LBB910_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv s0, s1
+; CHECK-RV64V-NEXT: .LBB910_14:
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a1
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: csrr a4, vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32V-NEXT: slli a3, a4, 3
+; CHECK-RV32V-NEXT: slli a1, a4, 2
+; CHECK-RV32V-NEXT: add a0, a0, a3
+; CHECK-RV32V-NEXT: sub a3, a2, a1
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: sltu a0, a2, a3
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a3, a0, a3
+; CHECK-RV32V-NEXT: slli a0, a4, 1
+; CHECK-RV32V-NEXT: sub a4, a3, a0
+; CHECK-RV32V-NEXT: sltu a5, a3, a4
+; CHECK-RV32V-NEXT: addi a5, a5, -1
+; CHECK-RV32V-NEXT: and a4, a5, a4
+; CHECK-RV32V-NEXT: bltu a3, a0, .LBB910_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a3, a0
+; CHECK-RV32V-NEXT: .LBB910_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32V-NEXT: bltu a2, a1, .LBB910_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB910_4:
+; CHECK-RV32V-NEXT: sub a1, a2, a0
+; CHECK-RV32V-NEXT: sltu a3, a2, a1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a1, a3, a1
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a2, a0, .LBB910_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB910_6:
+; CHECK-RV32V-NEXT: addi a0, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64VC-NEXT: mv s0, a6
+; CHECK-RV64VC-NEXT: mv s2, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s1, vlenb
+; CHECK-RV64VC-NEXT: slli a0, s1, 3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 4
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a7, s1, 2
+; CHECK-RV64VC-NEXT: sub a1, s0, a7
+; CHECK-RV64VC-NEXT: sltu a2, s0, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a3, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, s1, 1
+; CHECK-RV64VC-NEXT: sub a2, a3, a1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: sub t0, a2, s1
+; CHECK-RV64VC-NEXT: mv a5, a2
+; CHECK-RV64VC-NEXT: bltu a2, s1, .LBB910_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a5, s1
+; CHECK-RV64VC-NEXT: .LBB910_2:
+; CHECK-RV64VC-NEXT: sltu a6, a2, t0
+; CHECK-RV64VC-NEXT: bltu a3, a1, .LBB910_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a3, a1
+; CHECK-RV64VC-NEXT: .LBB910_4:
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: addi a6, a6, -1
+; CHECK-RV64VC-NEXT: sub a2, a3, s1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: bltu a3, s1, .LBB910_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a3, s1
+; CHECK-RV64VC-NEXT: .LBB910_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: and a0, a6, t0
+; CHECK-RV64VC-NEXT: bltu s0, a7, .LBB910_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv s0, a7
+; CHECK-RV64VC-NEXT: .LBB910_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64VC-NEXT: sub a0, s0, a1
+; CHECK-RV64VC-NEXT: sltu a2, s0, a0
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a2
+; CHECK-RV64VC-NEXT: sub a2, a0, s1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: bltu a0, s1, .LBB910_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: .LBB910_10:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: slli a3, a3, 1
+; CHECK-RV64VC-NEXT: add a3, a3, a4
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, a1, .LBB910_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv s0, a1
+; CHECK-RV64VC-NEXT: .LBB910_12:
+; CHECK-RV64VC-NEXT: sub a0, s0, s1
+; CHECK-RV64VC-NEXT: sltu a1, s0, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, s1, .LBB910_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv s0, s1
+; CHECK-RV64VC-NEXT: .LBB910_14:
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a1
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: csrr a4, vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32VC-NEXT: slli a3, a4, 3
+; CHECK-RV32VC-NEXT: slli a1, a4, 2
+; CHECK-RV32VC-NEXT: add a0, a0, a3
+; CHECK-RV32VC-NEXT: sub a3, a2, a1
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: sltu a0, a2, a3
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a3, a3, a0
+; CHECK-RV32VC-NEXT: slli a0, a4, 1
+; CHECK-RV32VC-NEXT: sub a4, a3, a0
+; CHECK-RV32VC-NEXT: sltu a5, a3, a4
+; CHECK-RV32VC-NEXT: addi a5, a5, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a5
+; CHECK-RV32VC-NEXT: bltu a3, a0, .LBB910_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a3, a0
+; CHECK-RV32VC-NEXT: .LBB910_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32VC-NEXT: bltu a2, a1, .LBB910_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB910_4:
+; CHECK-RV32VC-NEXT: sub a1, a2, a0
+; CHECK-RV32VC-NEXT: sltu a3, a2, a1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a1, a1, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a2, a0, .LBB910_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB910_6:
+; CHECK-RV32VC-NEXT: addi a0, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_PALL(<vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64V-NEXT: mv s0, a6
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s1, vlenb
+; CHECK-RV64V-NEXT: slli a0, s1, 3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 4
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s1, 2
+; CHECK-RV64V-NEXT: sub a1, s0, a2
+; CHECK-RV64V-NEXT: sltu a3, s0, a1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a3, a3, a1
+; CHECK-RV64V-NEXT: slli a1, s1, 1
+; CHECK-RV64V-NEXT: sub a4, a3, a1
+; CHECK-RV64V-NEXT: sltu a5, a3, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a6, a5, a4
+; CHECK-RV64V-NEXT: sub a4, a6, s1
+; CHECK-RV64V-NEXT: mv a5, a6
+; CHECK-RV64V-NEXT: bltu a6, s1, .LBB911_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a5, s1
+; CHECK-RV64V-NEXT: .LBB911_2:
+; CHECK-RV64V-NEXT: sltu a7, a6, a4
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB911_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: .LBB911_4:
+; CHECK-RV64V-NEXT: add a6, s2, a0
+; CHECK-RV64V-NEXT: addi a0, a7, -1
+; CHECK-RV64V-NEXT: sub a7, a3, s1
+; CHECK-RV64V-NEXT: sltu t0, a3, a7
+; CHECK-RV64V-NEXT: addi t0, t0, -1
+; CHECK-RV64V-NEXT: and a7, t0, a7
+; CHECK-RV64V-NEXT: bltu a3, s1, .LBB911_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: .LBB911_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a6)
+; CHECK-RV64V-NEXT: addi a6, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a7, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: bltu s0, a2, .LBB911_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv s0, a2
+; CHECK-RV64V-NEXT: .LBB911_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64V-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64V-NEXT: sub a0, s0, a1
+; CHECK-RV64V-NEXT: sltu a2, s0, a0
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: and a0, a2, a0
+; CHECK-RV64V-NEXT: sub a2, a0, s1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: bltu a0, s1, .LBB911_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: .LBB911_10:
+; CHECK-RV64V-NEXT: csrr a3, vlenb
+; CHECK-RV64V-NEXT: slli a3, a3, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: slli a3, a3, 1
+; CHECK-RV64V-NEXT: add a3, a3, a4
+; CHECK-RV64V-NEXT: add a3, sp, a3
+; CHECK-RV64V-NEXT: addi a3, a3, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, a1, .LBB911_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv s0, a1
+; CHECK-RV64V-NEXT: .LBB911_12:
+; CHECK-RV64V-NEXT: sub a0, s0, s1
+; CHECK-RV64V-NEXT: sltu a1, s0, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, s1, .LBB911_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv s0, s1
+; CHECK-RV64V-NEXT: .LBB911_14:
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a1
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: csrr a4, vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32V-NEXT: slli a3, a4, 3
+; CHECK-RV32V-NEXT: slli a1, a4, 2
+; CHECK-RV32V-NEXT: add a0, a0, a3
+; CHECK-RV32V-NEXT: sub a3, a2, a1
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: sltu a0, a2, a3
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a3, a0, a3
+; CHECK-RV32V-NEXT: slli a0, a4, 1
+; CHECK-RV32V-NEXT: sub a4, a3, a0
+; CHECK-RV32V-NEXT: sltu a5, a3, a4
+; CHECK-RV32V-NEXT: addi a5, a5, -1
+; CHECK-RV32V-NEXT: and a4, a5, a4
+; CHECK-RV32V-NEXT: bltu a3, a0, .LBB911_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a3, a0
+; CHECK-RV32V-NEXT: .LBB911_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32V-NEXT: bltu a2, a1, .LBB911_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB911_4:
+; CHECK-RV32V-NEXT: sub a1, a2, a0
+; CHECK-RV32V-NEXT: sltu a3, a2, a1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a1, a3, a1
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a2, a0, .LBB911_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB911_6:
+; CHECK-RV32V-NEXT: addi a0, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64VC-NEXT: mv s0, a6
+; CHECK-RV64VC-NEXT: mv s2, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s1, vlenb
+; CHECK-RV64VC-NEXT: slli a0, s1, 3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 4
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a7, s1, 2
+; CHECK-RV64VC-NEXT: sub a1, s0, a7
+; CHECK-RV64VC-NEXT: sltu a2, s0, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a3, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, s1, 1
+; CHECK-RV64VC-NEXT: sub a2, a3, a1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: sub t0, a2, s1
+; CHECK-RV64VC-NEXT: mv a5, a2
+; CHECK-RV64VC-NEXT: bltu a2, s1, .LBB911_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a5, s1
+; CHECK-RV64VC-NEXT: .LBB911_2:
+; CHECK-RV64VC-NEXT: sltu a6, a2, t0
+; CHECK-RV64VC-NEXT: bltu a3, a1, .LBB911_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a3, a1
+; CHECK-RV64VC-NEXT: .LBB911_4:
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: addi a6, a6, -1
+; CHECK-RV64VC-NEXT: sub a2, a3, s1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: bltu a3, s1, .LBB911_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a3, s1
+; CHECK-RV64VC-NEXT: .LBB911_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: and a0, a6, t0
+; CHECK-RV64VC-NEXT: bltu s0, a7, .LBB911_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv s0, a7
+; CHECK-RV64VC-NEXT: .LBB911_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64VC-NEXT: sub a0, s0, a1
+; CHECK-RV64VC-NEXT: sltu a2, s0, a0
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a2
+; CHECK-RV64VC-NEXT: sub a2, a0, s1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: bltu a0, s1, .LBB911_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: .LBB911_10:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: slli a3, a3, 1
+; CHECK-RV64VC-NEXT: add a3, a3, a4
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, a1, .LBB911_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv s0, a1
+; CHECK-RV64VC-NEXT: .LBB911_12:
+; CHECK-RV64VC-NEXT: sub a0, s0, s1
+; CHECK-RV64VC-NEXT: sltu a1, s0, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, s1, .LBB911_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv s0, s1
+; CHECK-RV64VC-NEXT: .LBB911_14:
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a1
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: csrr a4, vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32VC-NEXT: slli a3, a4, 3
+; CHECK-RV32VC-NEXT: slli a1, a4, 2
+; CHECK-RV32VC-NEXT: add a0, a0, a3
+; CHECK-RV32VC-NEXT: sub a3, a2, a1
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: sltu a0, a2, a3
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a3, a3, a0
+; CHECK-RV32VC-NEXT: slli a0, a4, 1
+; CHECK-RV32VC-NEXT: sub a4, a3, a0
+; CHECK-RV32VC-NEXT: sltu a5, a3, a4
+; CHECK-RV32VC-NEXT: addi a5, a5, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a5
+; CHECK-RV32VC-NEXT: bltu a3, a0, .LBB911_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a3, a0
+; CHECK-RV32VC-NEXT: .LBB911_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32VC-NEXT: bltu a2, a1, .LBB911_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB911_4:
+; CHECK-RV32VC-NEXT: sub a1, a2, a0
+; CHECK-RV32VC-NEXT: sltu a3, a2, a1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a1, a1, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a2, a0, .LBB911_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB911_6:
+; CHECK-RV32VC-NEXT: addi a0, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_S1(<vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64V-NEXT: mv s0, a6
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s1, vlenb
+; CHECK-RV64V-NEXT: slli a0, s1, 3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 4
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s1, 2
+; CHECK-RV64V-NEXT: sub a1, s0, a2
+; CHECK-RV64V-NEXT: sltu a3, s0, a1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a3, a3, a1
+; CHECK-RV64V-NEXT: slli a1, s1, 1
+; CHECK-RV64V-NEXT: sub a4, a3, a1
+; CHECK-RV64V-NEXT: sltu a5, a3, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a6, a5, a4
+; CHECK-RV64V-NEXT: sub a4, a6, s1
+; CHECK-RV64V-NEXT: mv a5, a6
+; CHECK-RV64V-NEXT: bltu a6, s1, .LBB912_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a5, s1
+; CHECK-RV64V-NEXT: .LBB912_2:
+; CHECK-RV64V-NEXT: sltu a7, a6, a4
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB912_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: .LBB912_4:
+; CHECK-RV64V-NEXT: add a6, s2, a0
+; CHECK-RV64V-NEXT: addi a0, a7, -1
+; CHECK-RV64V-NEXT: sub a7, a3, s1
+; CHECK-RV64V-NEXT: sltu t0, a3, a7
+; CHECK-RV64V-NEXT: addi t0, t0, -1
+; CHECK-RV64V-NEXT: and a7, t0, a7
+; CHECK-RV64V-NEXT: bltu a3, s1, .LBB912_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: .LBB912_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a6)
+; CHECK-RV64V-NEXT: addi a6, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a7, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: bltu s0, a2, .LBB912_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv s0, a2
+; CHECK-RV64V-NEXT: .LBB912_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64V-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64V-NEXT: sub a0, s0, a1
+; CHECK-RV64V-NEXT: sltu a2, s0, a0
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: and a0, a2, a0
+; CHECK-RV64V-NEXT: sub a2, a0, s1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: bltu a0, s1, .LBB912_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: .LBB912_10:
+; CHECK-RV64V-NEXT: csrr a3, vlenb
+; CHECK-RV64V-NEXT: slli a3, a3, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: slli a3, a3, 1
+; CHECK-RV64V-NEXT: add a3, a3, a4
+; CHECK-RV64V-NEXT: add a3, sp, a3
+; CHECK-RV64V-NEXT: addi a3, a3, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, a1, .LBB912_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv s0, a1
+; CHECK-RV64V-NEXT: .LBB912_12:
+; CHECK-RV64V-NEXT: sub a0, s0, s1
+; CHECK-RV64V-NEXT: sltu a1, s0, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, s1, .LBB912_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv s0, s1
+; CHECK-RV64V-NEXT: .LBB912_14:
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a1
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: csrr a4, vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32V-NEXT: slli a3, a4, 3
+; CHECK-RV32V-NEXT: slli a1, a4, 2
+; CHECK-RV32V-NEXT: add a0, a0, a3
+; CHECK-RV32V-NEXT: sub a3, a2, a1
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: sltu a0, a2, a3
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a3, a0, a3
+; CHECK-RV32V-NEXT: slli a0, a4, 1
+; CHECK-RV32V-NEXT: sub a4, a3, a0
+; CHECK-RV32V-NEXT: sltu a5, a3, a4
+; CHECK-RV32V-NEXT: addi a5, a5, -1
+; CHECK-RV32V-NEXT: and a4, a5, a4
+; CHECK-RV32V-NEXT: bltu a3, a0, .LBB912_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a3, a0
+; CHECK-RV32V-NEXT: .LBB912_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32V-NEXT: bltu a2, a1, .LBB912_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB912_4:
+; CHECK-RV32V-NEXT: sub a1, a2, a0
+; CHECK-RV32V-NEXT: sltu a3, a2, a1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a1, a3, a1
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a2, a0, .LBB912_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB912_6:
+; CHECK-RV32V-NEXT: addi a0, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64VC-NEXT: mv s0, a6
+; CHECK-RV64VC-NEXT: mv s2, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s1, vlenb
+; CHECK-RV64VC-NEXT: slli a0, s1, 3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 4
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a7, s1, 2
+; CHECK-RV64VC-NEXT: sub a1, s0, a7
+; CHECK-RV64VC-NEXT: sltu a2, s0, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a3, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, s1, 1
+; CHECK-RV64VC-NEXT: sub a2, a3, a1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: sub t0, a2, s1
+; CHECK-RV64VC-NEXT: mv a5, a2
+; CHECK-RV64VC-NEXT: bltu a2, s1, .LBB912_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a5, s1
+; CHECK-RV64VC-NEXT: .LBB912_2:
+; CHECK-RV64VC-NEXT: sltu a6, a2, t0
+; CHECK-RV64VC-NEXT: bltu a3, a1, .LBB912_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a3, a1
+; CHECK-RV64VC-NEXT: .LBB912_4:
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: addi a6, a6, -1
+; CHECK-RV64VC-NEXT: sub a2, a3, s1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: bltu a3, s1, .LBB912_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a3, s1
+; CHECK-RV64VC-NEXT: .LBB912_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: and a0, a6, t0
+; CHECK-RV64VC-NEXT: bltu s0, a7, .LBB912_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv s0, a7
+; CHECK-RV64VC-NEXT: .LBB912_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64VC-NEXT: sub a0, s0, a1
+; CHECK-RV64VC-NEXT: sltu a2, s0, a0
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a2
+; CHECK-RV64VC-NEXT: sub a2, a0, s1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: bltu a0, s1, .LBB912_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: .LBB912_10:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: slli a3, a3, 1
+; CHECK-RV64VC-NEXT: add a3, a3, a4
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, a1, .LBB912_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv s0, a1
+; CHECK-RV64VC-NEXT: .LBB912_12:
+; CHECK-RV64VC-NEXT: sub a0, s0, s1
+; CHECK-RV64VC-NEXT: sltu a1, s0, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, s1, .LBB912_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv s0, s1
+; CHECK-RV64VC-NEXT: .LBB912_14:
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a1
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: csrr a4, vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32VC-NEXT: slli a3, a4, 3
+; CHECK-RV32VC-NEXT: slli a1, a4, 2
+; CHECK-RV32VC-NEXT: add a0, a0, a3
+; CHECK-RV32VC-NEXT: sub a3, a2, a1
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: sltu a0, a2, a3
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a3, a3, a0
+; CHECK-RV32VC-NEXT: slli a0, a4, 1
+; CHECK-RV32VC-NEXT: sub a4, a3, a0
+; CHECK-RV32VC-NEXT: sltu a5, a3, a4
+; CHECK-RV32VC-NEXT: addi a5, a5, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a5
+; CHECK-RV32VC-NEXT: bltu a3, a0, .LBB912_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a3, a0
+; CHECK-RV32VC-NEXT: .LBB912_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32VC-NEXT: bltu a2, a1, .LBB912_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB912_4:
+; CHECK-RV32VC-NEXT: sub a1, a2, a0
+; CHECK-RV32VC-NEXT: sltu a3, a2, a1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a1, a1, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a2, a0, .LBB912_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB912_6:
+; CHECK-RV32VC-NEXT: addi a0, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_ALL(<vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64V-NEXT: mv s0, a6
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s1, vlenb
+; CHECK-RV64V-NEXT: slli a0, s1, 3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 4
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s1, 2
+; CHECK-RV64V-NEXT: sub a1, s0, a2
+; CHECK-RV64V-NEXT: sltu a3, s0, a1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a3, a3, a1
+; CHECK-RV64V-NEXT: slli a1, s1, 1
+; CHECK-RV64V-NEXT: sub a4, a3, a1
+; CHECK-RV64V-NEXT: sltu a5, a3, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a6, a5, a4
+; CHECK-RV64V-NEXT: sub a4, a6, s1
+; CHECK-RV64V-NEXT: mv a5, a6
+; CHECK-RV64V-NEXT: bltu a6, s1, .LBB913_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a5, s1
+; CHECK-RV64V-NEXT: .LBB913_2:
+; CHECK-RV64V-NEXT: sltu a7, a6, a4
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB913_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: .LBB913_4:
+; CHECK-RV64V-NEXT: add a6, s2, a0
+; CHECK-RV64V-NEXT: addi a0, a7, -1
+; CHECK-RV64V-NEXT: sub a7, a3, s1
+; CHECK-RV64V-NEXT: sltu t0, a3, a7
+; CHECK-RV64V-NEXT: addi t0, t0, -1
+; CHECK-RV64V-NEXT: and a7, t0, a7
+; CHECK-RV64V-NEXT: bltu a3, s1, .LBB913_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: .LBB913_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a6)
+; CHECK-RV64V-NEXT: addi a6, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a7, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: bltu s0, a2, .LBB913_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv s0, a2
+; CHECK-RV64V-NEXT: .LBB913_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64V-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64V-NEXT: sub a0, s0, a1
+; CHECK-RV64V-NEXT: sltu a2, s0, a0
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: and a0, a2, a0
+; CHECK-RV64V-NEXT: sub a2, a0, s1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: bltu a0, s1, .LBB913_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: .LBB913_10:
+; CHECK-RV64V-NEXT: csrr a3, vlenb
+; CHECK-RV64V-NEXT: slli a3, a3, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: slli a3, a3, 1
+; CHECK-RV64V-NEXT: add a3, a3, a4
+; CHECK-RV64V-NEXT: add a3, sp, a3
+; CHECK-RV64V-NEXT: addi a3, a3, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, a1, .LBB913_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv s0, a1
+; CHECK-RV64V-NEXT: .LBB913_12:
+; CHECK-RV64V-NEXT: sub a0, s0, s1
+; CHECK-RV64V-NEXT: sltu a1, s0, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, s1, .LBB913_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv s0, s1
+; CHECK-RV64V-NEXT: .LBB913_14:
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a1
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: csrr a4, vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32V-NEXT: slli a3, a4, 3
+; CHECK-RV32V-NEXT: slli a1, a4, 2
+; CHECK-RV32V-NEXT: add a0, a0, a3
+; CHECK-RV32V-NEXT: sub a3, a2, a1
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: sltu a0, a2, a3
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a3, a0, a3
+; CHECK-RV32V-NEXT: slli a0, a4, 1
+; CHECK-RV32V-NEXT: sub a4, a3, a0
+; CHECK-RV32V-NEXT: sltu a5, a3, a4
+; CHECK-RV32V-NEXT: addi a5, a5, -1
+; CHECK-RV32V-NEXT: and a4, a5, a4
+; CHECK-RV32V-NEXT: bltu a3, a0, .LBB913_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a3, a0
+; CHECK-RV32V-NEXT: .LBB913_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32V-NEXT: bltu a2, a1, .LBB913_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB913_4:
+; CHECK-RV32V-NEXT: sub a1, a2, a0
+; CHECK-RV32V-NEXT: sltu a3, a2, a1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a1, a3, a1
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a2, a0, .LBB913_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB913_6:
+; CHECK-RV32V-NEXT: addi a0, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64VC-NEXT: mv s0, a6
+; CHECK-RV64VC-NEXT: mv s2, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s1, vlenb
+; CHECK-RV64VC-NEXT: slli a0, s1, 3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 4
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a7, s1, 2
+; CHECK-RV64VC-NEXT: sub a1, s0, a7
+; CHECK-RV64VC-NEXT: sltu a2, s0, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a3, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, s1, 1
+; CHECK-RV64VC-NEXT: sub a2, a3, a1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: sub t0, a2, s1
+; CHECK-RV64VC-NEXT: mv a5, a2
+; CHECK-RV64VC-NEXT: bltu a2, s1, .LBB913_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a5, s1
+; CHECK-RV64VC-NEXT: .LBB913_2:
+; CHECK-RV64VC-NEXT: sltu a6, a2, t0
+; CHECK-RV64VC-NEXT: bltu a3, a1, .LBB913_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a3, a1
+; CHECK-RV64VC-NEXT: .LBB913_4:
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: addi a6, a6, -1
+; CHECK-RV64VC-NEXT: sub a2, a3, s1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: bltu a3, s1, .LBB913_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a3, s1
+; CHECK-RV64VC-NEXT: .LBB913_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: and a0, a6, t0
+; CHECK-RV64VC-NEXT: bltu s0, a7, .LBB913_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv s0, a7
+; CHECK-RV64VC-NEXT: .LBB913_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64VC-NEXT: sub a0, s0, a1
+; CHECK-RV64VC-NEXT: sltu a2, s0, a0
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a2
+; CHECK-RV64VC-NEXT: sub a2, a0, s1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: bltu a0, s1, .LBB913_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: .LBB913_10:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: slli a3, a3, 1
+; CHECK-RV64VC-NEXT: add a3, a3, a4
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, a1, .LBB913_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv s0, a1
+; CHECK-RV64VC-NEXT: .LBB913_12:
+; CHECK-RV64VC-NEXT: sub a0, s0, s1
+; CHECK-RV64VC-NEXT: sltu a1, s0, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, s1, .LBB913_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv s0, s1
+; CHECK-RV64VC-NEXT: .LBB913_14:
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a1
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: csrr a4, vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32VC-NEXT: slli a3, a4, 3
+; CHECK-RV32VC-NEXT: slli a1, a4, 2
+; CHECK-RV32VC-NEXT: add a0, a0, a3
+; CHECK-RV32VC-NEXT: sub a3, a2, a1
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: sltu a0, a2, a3
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a3, a3, a0
+; CHECK-RV32VC-NEXT: slli a0, a4, 1
+; CHECK-RV32VC-NEXT: sub a4, a3, a0
+; CHECK-RV32VC-NEXT: sltu a5, a3, a4
+; CHECK-RV32VC-NEXT: addi a5, a5, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a5
+; CHECK-RV32VC-NEXT: bltu a3, a0, .LBB913_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a3, a0
+; CHECK-RV32VC-NEXT: .LBB913_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32VC-NEXT: bltu a2, a1, .LBB913_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB913_4:
+; CHECK-RV32VC-NEXT: sub a1, a2, a0
+; CHECK-RV32VC-NEXT: sltu a3, a2, a1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a1, a1, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a2, a0, .LBB913_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB913_6:
+; CHECK-RV32VC-NEXT: addi a0, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 64 x i8> %x
+}
+
+define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_DEFAULT(<vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 4
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64V-NEXT: mv s0, a6
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s1, vlenb
+; CHECK-RV64V-NEXT: slli a0, s1, 3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 4
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s1, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: addi a0, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s1, 2
+; CHECK-RV64V-NEXT: sub a1, s0, a2
+; CHECK-RV64V-NEXT: sltu a3, s0, a1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a3, a3, a1
+; CHECK-RV64V-NEXT: slli a1, s1, 1
+; CHECK-RV64V-NEXT: sub a4, a3, a1
+; CHECK-RV64V-NEXT: sltu a5, a3, a4
+; CHECK-RV64V-NEXT: addi a5, a5, -1
+; CHECK-RV64V-NEXT: and a6, a5, a4
+; CHECK-RV64V-NEXT: sub a4, a6, s1
+; CHECK-RV64V-NEXT: mv a5, a6
+; CHECK-RV64V-NEXT: bltu a6, s1, .LBB914_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a5, s1
+; CHECK-RV64V-NEXT: .LBB914_2:
+; CHECK-RV64V-NEXT: sltu a7, a6, a4
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB914_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: .LBB914_4:
+; CHECK-RV64V-NEXT: add a6, s2, a0
+; CHECK-RV64V-NEXT: addi a0, a7, -1
+; CHECK-RV64V-NEXT: sub a7, a3, s1
+; CHECK-RV64V-NEXT: sltu t0, a3, a7
+; CHECK-RV64V-NEXT: addi t0, t0, -1
+; CHECK-RV64V-NEXT: and a7, t0, a7
+; CHECK-RV64V-NEXT: bltu a3, s1, .LBB914_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: .LBB914_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a6)
+; CHECK-RV64V-NEXT: addi a6, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 3
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a7, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64V-NEXT: and a0, a0, a4
+; CHECK-RV64V-NEXT: bltu s0, a2, .LBB914_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv s0, a2
+; CHECK-RV64V-NEXT: .LBB914_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64V-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64V-NEXT: sub a0, s0, a1
+; CHECK-RV64V-NEXT: sltu a2, s0, a0
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: and a0, a2, a0
+; CHECK-RV64V-NEXT: sub a2, a0, s1
+; CHECK-RV64V-NEXT: sltu a3, a0, a2
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a2, a3, a2
+; CHECK-RV64V-NEXT: bltu a0, s1, .LBB914_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a0, s1
+; CHECK-RV64V-NEXT: .LBB914_10:
+; CHECK-RV64V-NEXT: csrr a3, vlenb
+; CHECK-RV64V-NEXT: slli a3, a3, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: slli a3, a3, 1
+; CHECK-RV64V-NEXT: add a3, a3, a4
+; CHECK-RV64V-NEXT: add a3, sp, a3
+; CHECK-RV64V-NEXT: addi a3, a3, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, a1, .LBB914_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv s0, a1
+; CHECK-RV64V-NEXT: .LBB914_12:
+; CHECK-RV64V-NEXT: sub a0, s0, s1
+; CHECK-RV64V-NEXT: sltu a1, s0, a0
+; CHECK-RV64V-NEXT: addi a1, a1, -1
+; CHECK-RV64V-NEXT: and a0, a1, a0
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64V-NEXT: bltu s0, s1, .LBB914_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv s0, s1
+; CHECK-RV64V-NEXT: .LBB914_14:
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a1, a1, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a1
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: csrr a4, vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32V-NEXT: slli a3, a4, 3
+; CHECK-RV32V-NEXT: slli a1, a4, 2
+; CHECK-RV32V-NEXT: add a0, a0, a3
+; CHECK-RV32V-NEXT: sub a3, a2, a1
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: sltu a0, a2, a3
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a3, a0, a3
+; CHECK-RV32V-NEXT: slli a0, a4, 1
+; CHECK-RV32V-NEXT: sub a4, a3, a0
+; CHECK-RV32V-NEXT: sltu a5, a3, a4
+; CHECK-RV32V-NEXT: addi a5, a5, -1
+; CHECK-RV32V-NEXT: and a4, a5, a4
+; CHECK-RV32V-NEXT: bltu a3, a0, .LBB914_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a3, a0
+; CHECK-RV32V-NEXT: .LBB914_2:
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32V-NEXT: bltu a2, a1, .LBB914_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB914_4:
+; CHECK-RV32V-NEXT: sub a1, a2, a0
+; CHECK-RV32V-NEXT: sltu a3, a2, a1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a1, a3, a1
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32V-NEXT: bltu a2, a0, .LBB914_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: .LBB914_6:
+; CHECK-RV32V-NEXT: addi a0, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32V-NEXT: vmv8r.v v8, v24
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; CHECK-RV64VC-NEXT: mv s0, a6
+; CHECK-RV64VC-NEXT: mv s2, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s1, vlenb
+; CHECK-RV64VC-NEXT: slli a0, s1, 3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 4
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s1, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a7, s1, 2
+; CHECK-RV64VC-NEXT: sub a1, s0, a7
+; CHECK-RV64VC-NEXT: sltu a2, s0, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a3, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, s1, 1
+; CHECK-RV64VC-NEXT: sub a2, a3, a1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: sub t0, a2, s1
+; CHECK-RV64VC-NEXT: mv a5, a2
+; CHECK-RV64VC-NEXT: bltu a2, s1, .LBB914_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a5, s1
+; CHECK-RV64VC-NEXT: .LBB914_2:
+; CHECK-RV64VC-NEXT: sltu a6, a2, t0
+; CHECK-RV64VC-NEXT: bltu a3, a1, .LBB914_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a3, a1
+; CHECK-RV64VC-NEXT: .LBB914_4:
+; CHECK-RV64VC-NEXT: add a0, a0, s2
+; CHECK-RV64VC-NEXT: addi a6, a6, -1
+; CHECK-RV64VC-NEXT: sub a2, a3, s1
+; CHECK-RV64VC-NEXT: sltu a4, a3, a2
+; CHECK-RV64VC-NEXT: addi a4, a4, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a4
+; CHECK-RV64VC-NEXT: bltu a3, s1, .LBB914_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a3, s1
+; CHECK-RV64VC-NEXT: .LBB914_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a0)
+; CHECK-RV64VC-NEXT: addi a0, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a5, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v14, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v13, (zero), v24
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v24
+; CHECK-RV64VC-NEXT: and a0, a6, t0
+; CHECK-RV64VC-NEXT: bltu s0, a7, .LBB914_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv s0, a7
+; CHECK-RV64VC-NEXT: .LBB914_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v15, (zero), v16
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (s2)
+; CHECK-RV64VC-NEXT: sub a0, s0, a1
+; CHECK-RV64VC-NEXT: sltu a2, s0, a0
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a2
+; CHECK-RV64VC-NEXT: sub a2, a0, s1
+; CHECK-RV64VC-NEXT: sltu a3, a0, a2
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a3
+; CHECK-RV64VC-NEXT: bltu a0, s1, .LBB914_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a0, s1
+; CHECK-RV64VC-NEXT: .LBB914_10:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: slli a3, a3, 1
+; CHECK-RV64VC-NEXT: add a3, a3, a4
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v11, (zero), v24
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, a1, .LBB914_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv s0, a1
+; CHECK-RV64VC-NEXT: .LBB914_12:
+; CHECK-RV64VC-NEXT: sub a0, s0, s1
+; CHECK-RV64VC-NEXT: sltu a1, s0, a0
+; CHECK-RV64VC-NEXT: addi a1, a1, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v9, (zero), v16
+; CHECK-RV64VC-NEXT: bltu s0, s1, .LBB914_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv s0, s1
+; CHECK-RV64VC-NEXT: .LBB914_14:
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, s0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a1, a1, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a1
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: csrr a4, vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v8, (a0)
+; CHECK-RV32VC-NEXT: slli a3, a4, 3
+; CHECK-RV32VC-NEXT: slli a1, a4, 2
+; CHECK-RV32VC-NEXT: add a0, a0, a3
+; CHECK-RV32VC-NEXT: sub a3, a2, a1
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: sltu a0, a2, a3
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a3, a3, a0
+; CHECK-RV32VC-NEXT: slli a0, a4, 1
+; CHECK-RV32VC-NEXT: sub a4, a3, a0
+; CHECK-RV32VC-NEXT: sltu a5, a3, a4
+; CHECK-RV32VC-NEXT: addi a5, a5, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a5
+; CHECK-RV32VC-NEXT: bltu a3, a0, .LBB914_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a3, a0
+; CHECK-RV32VC-NEXT: .LBB914_2:
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v30, (zero), v0
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v28, (zero), v8
+; CHECK-RV32VC-NEXT: bltu a2, a1, .LBB914_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB914_4:
+; CHECK-RV32VC-NEXT: sub a1, a2, a0
+; CHECK-RV32VC-NEXT: sltu a3, a2, a1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a1, a1, a3
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v26, (zero), v16
+; CHECK-RV32VC-NEXT: bltu a2, a0, .LBB914_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: .LBB914_6:
+; CHECK-RV32VC-NEXT: addi a0, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v24, (zero), v8
+; CHECK-RV32VC-NEXT: vmv8r.v v8, v24
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.vp.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 64 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv64i8_P1(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a2, a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64V-NEXT: mv s1, a7
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s0, vlenb
+; CHECK-RV64V-NEXT: li a1, 48
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s0, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s0, 2
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: bltu s1, a2, .LBB915_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: .LBB915_2:
+; CHECK-RV64V-NEXT: slli a5, s0, 4
+; CHECK-RV64V-NEXT: slli a1, s0, 1
+; CHECK-RV64V-NEXT: slli a6, s0, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB915_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a4, a1
+; CHECK-RV64V-NEXT: .LBB915_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (s2)
+; CHECK-RV64V-NEXT: add a7, s2, a0
+; CHECK-RV64V-NEXT: add a5, s2, a5
+; CHECK-RV64V-NEXT: add a6, s2, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, s0, .LBB915_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: .LBB915_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a7)
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: add a7, sp, a7
+; CHECK-RV64V-NEXT: addi a7, a7, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: mv a6, a5
+; CHECK-RV64V-NEXT: slli a5, a5, 1
+; CHECK-RV64V-NEXT: add a5, a5, a6
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, s0
+; CHECK-RV64V-NEXT: sub a5, a3, a1
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a5
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a5
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64V-NEXT: mv a3, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB915_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB915_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64V-NEXT: sub a3, a0, s0
+; CHECK-RV64V-NEXT: sub a2, s1, a2
+; CHECK-RV64V-NEXT: sltu a0, a0, a3
+; CHECK-RV64V-NEXT: sltu a4, s1, a2
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: and a3, a0, a3
+; CHECK-RV64V-NEXT: and a0, a4, a2
+; CHECK-RV64V-NEXT: addi a2, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB915_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB915_10:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: bltu a2, s0, .LBB915_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB915_12:
+; CHECK-RV64V-NEXT: csrr a4, vlenb
+; CHECK-RV64V-NEXT: slli a4, a4, 3
+; CHECK-RV64V-NEXT: add a4, sp, a4
+; CHECK-RV64V-NEXT: addi a4, a4, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: sub a3, a2, s0
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a2, a2, a3
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a2, a2, a3
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB915_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv a1, s0
+; CHECK-RV64V-NEXT: .LBB915_14:
+; CHECK-RV64V-NEXT: csrr a2, vlenb
+; CHECK-RV64V-NEXT: slli a2, a2, 5
+; CHECK-RV64V-NEXT: add a2, sp, a2
+; CHECK-RV64V-NEXT: addi a2, a2, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64V-NEXT: sub a1, a0, s0
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a1, a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a5, a1, 4
+; CHECK-RV32V-NEXT: slli a2, a1, 2
+; CHECK-RV32V-NEXT: slli a6, a1, 3
+; CHECK-RV32V-NEXT: mv a4, a3
+; CHECK-RV32V-NEXT: bltu a3, a2, .LBB915_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a4, a2
+; CHECK-RV32V-NEXT: .LBB915_2:
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: add a5, a0, a5
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: add a0, a0, a6
+; CHECK-RV32V-NEXT: mv a6, a4
+; CHECK-RV32V-NEXT: bltu a4, a1, .LBB915_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a6, a1
+; CHECK-RV32V-NEXT: .LBB915_4:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a7, vlenb
+; CHECK-RV32V-NEXT: slli a7, a7, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a7
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a5)
+; CHECK-RV32V-NEXT: addi a5, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: vsetvli zero, a6, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a4, a1
+; CHECK-RV32V-NEXT: sub a2, a3, a2
+; CHECK-RV32V-NEXT: sltu a4, a4, a0
+; CHECK-RV32V-NEXT: sltu a3, a3, a2
+; CHECK-RV32V-NEXT: addi a4, a4, -1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a4, a4, a0
+; CHECK-RV32V-NEXT: and a0, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB915_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB915_6:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: sub a1, a0, a1
+; CHECK-RV32V-NEXT: sltu a0, a0, a1
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a0, a0, a1
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64VC-NEXT: mv s2, a7
+; CHECK-RV64VC-NEXT: mv s1, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s0, vlenb
+; CHECK-RV64VC-NEXT: li a1, 48
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a6, s0, 2
+; CHECK-RV64VC-NEXT: mv a3, s2
+; CHECK-RV64VC-NEXT: bltu s2, a6, .LBB915_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a3, a6
+; CHECK-RV64VC-NEXT: .LBB915_2:
+; CHECK-RV64VC-NEXT: slli a5, s0, 4
+; CHECK-RV64VC-NEXT: slli a7, s0, 1
+; CHECK-RV64VC-NEXT: slli a2, s0, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a7, .LBB915_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a4, a7
+; CHECK-RV64VC-NEXT: .LBB915_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (s1)
+; CHECK-RV64VC-NEXT: add a1, s1, a0
+; CHECK-RV64VC-NEXT: add a5, a5, s1
+; CHECK-RV64VC-NEXT: add a2, a2, s1
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, s0, .LBB915_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: .LBB915_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a1)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a2)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, s0
+; CHECK-RV64VC-NEXT: sub a1, a3, a7
+; CHECK-RV64VC-NEXT: sltu a2, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB915_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB915_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sub a2, s2, a6
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, s2, a2
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: addi a2, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a7, .LBB915_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a2, a7
+; CHECK-RV64VC-NEXT: .LBB915_10:
+; CHECK-RV64VC-NEXT: mv a1, a2
+; CHECK-RV64VC-NEXT: bltu a2, s0, .LBB915_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB915_12:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a2, s0
+; CHECK-RV64VC-NEXT: sub a3, a0, a7
+; CHECK-RV64VC-NEXT: sltu a2, a2, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a3
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a2
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 3
+; CHECK-RV64VC-NEXT: mv a3, a2
+; CHECK-RV64VC-NEXT: slli a2, a2, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a3
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB915_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB915_14:
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 5
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a5, a1, 4
+; CHECK-RV32VC-NEXT: slli a2, a1, 2
+; CHECK-RV32VC-NEXT: slli a6, a1, 3
+; CHECK-RV32VC-NEXT: mv a4, a3
+; CHECK-RV32VC-NEXT: bltu a3, a2, .LBB915_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a4, a2
+; CHECK-RV32VC-NEXT: .LBB915_2:
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: add a7, a0, a5
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: add a0, a0, a6
+; CHECK-RV32VC-NEXT: mv a5, a4
+; CHECK-RV32VC-NEXT: bltu a4, a1, .LBB915_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a5, a1
+; CHECK-RV32VC-NEXT: .LBB915_4:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a6, vlenb
+; CHECK-RV32VC-NEXT: slli a6, a6, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a6
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a7)
+; CHECK-RV32VC-NEXT: addi a6, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: vsetvli zero, a5, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a4, a1
+; CHECK-RV32VC-NEXT: sub a2, a3, a2
+; CHECK-RV32VC-NEXT: sltu a4, a4, a0
+; CHECK-RV32VC-NEXT: sltu a3, a3, a2
+; CHECK-RV32VC-NEXT: addi a4, a4, -1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a0
+; CHECK-RV32VC-NEXT: and a0, a3, a2
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB915_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB915_6:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: sub a1, a0, a1
+; CHECK-RV32VC-NEXT: sltu a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv64i8_PALL(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a2, a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64V-NEXT: mv s1, a7
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s0, vlenb
+; CHECK-RV64V-NEXT: li a1, 48
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s0, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s0, 2
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: bltu s1, a2, .LBB916_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: .LBB916_2:
+; CHECK-RV64V-NEXT: slli a5, s0, 4
+; CHECK-RV64V-NEXT: slli a1, s0, 1
+; CHECK-RV64V-NEXT: slli a6, s0, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB916_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a4, a1
+; CHECK-RV64V-NEXT: .LBB916_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (s2)
+; CHECK-RV64V-NEXT: add a7, s2, a0
+; CHECK-RV64V-NEXT: add a5, s2, a5
+; CHECK-RV64V-NEXT: add a6, s2, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, s0, .LBB916_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: .LBB916_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a7)
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: add a7, sp, a7
+; CHECK-RV64V-NEXT: addi a7, a7, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: mv a6, a5
+; CHECK-RV64V-NEXT: slli a5, a5, 1
+; CHECK-RV64V-NEXT: add a5, a5, a6
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, s0
+; CHECK-RV64V-NEXT: sub a5, a3, a1
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a5
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a5
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64V-NEXT: mv a3, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB916_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB916_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64V-NEXT: sub a3, a0, s0
+; CHECK-RV64V-NEXT: sub a2, s1, a2
+; CHECK-RV64V-NEXT: sltu a0, a0, a3
+; CHECK-RV64V-NEXT: sltu a4, s1, a2
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: and a3, a0, a3
+; CHECK-RV64V-NEXT: and a0, a4, a2
+; CHECK-RV64V-NEXT: addi a2, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB916_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB916_10:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: bltu a2, s0, .LBB916_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB916_12:
+; CHECK-RV64V-NEXT: csrr a4, vlenb
+; CHECK-RV64V-NEXT: slli a4, a4, 3
+; CHECK-RV64V-NEXT: add a4, sp, a4
+; CHECK-RV64V-NEXT: addi a4, a4, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: sub a3, a2, s0
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a2, a2, a3
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a2, a2, a3
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB916_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv a1, s0
+; CHECK-RV64V-NEXT: .LBB916_14:
+; CHECK-RV64V-NEXT: csrr a2, vlenb
+; CHECK-RV64V-NEXT: slli a2, a2, 5
+; CHECK-RV64V-NEXT: add a2, sp, a2
+; CHECK-RV64V-NEXT: addi a2, a2, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64V-NEXT: sub a1, a0, s0
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a1, a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a5, a1, 4
+; CHECK-RV32V-NEXT: slli a2, a1, 2
+; CHECK-RV32V-NEXT: slli a6, a1, 3
+; CHECK-RV32V-NEXT: mv a4, a3
+; CHECK-RV32V-NEXT: bltu a3, a2, .LBB916_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a4, a2
+; CHECK-RV32V-NEXT: .LBB916_2:
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: add a5, a0, a5
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: add a0, a0, a6
+; CHECK-RV32V-NEXT: mv a6, a4
+; CHECK-RV32V-NEXT: bltu a4, a1, .LBB916_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a6, a1
+; CHECK-RV32V-NEXT: .LBB916_4:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a7, vlenb
+; CHECK-RV32V-NEXT: slli a7, a7, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a7
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a5)
+; CHECK-RV32V-NEXT: addi a5, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: vsetvli zero, a6, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a4, a1
+; CHECK-RV32V-NEXT: sub a2, a3, a2
+; CHECK-RV32V-NEXT: sltu a4, a4, a0
+; CHECK-RV32V-NEXT: sltu a3, a3, a2
+; CHECK-RV32V-NEXT: addi a4, a4, -1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a4, a4, a0
+; CHECK-RV32V-NEXT: and a0, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB916_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB916_6:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: sub a1, a0, a1
+; CHECK-RV32V-NEXT: sltu a0, a0, a1
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a0, a0, a1
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64VC-NEXT: mv s2, a7
+; CHECK-RV64VC-NEXT: mv s1, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s0, vlenb
+; CHECK-RV64VC-NEXT: li a1, 48
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a6, s0, 2
+; CHECK-RV64VC-NEXT: mv a3, s2
+; CHECK-RV64VC-NEXT: bltu s2, a6, .LBB916_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a3, a6
+; CHECK-RV64VC-NEXT: .LBB916_2:
+; CHECK-RV64VC-NEXT: slli a5, s0, 4
+; CHECK-RV64VC-NEXT: slli a7, s0, 1
+; CHECK-RV64VC-NEXT: slli a2, s0, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a7, .LBB916_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a4, a7
+; CHECK-RV64VC-NEXT: .LBB916_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (s1)
+; CHECK-RV64VC-NEXT: add a1, s1, a0
+; CHECK-RV64VC-NEXT: add a5, a5, s1
+; CHECK-RV64VC-NEXT: add a2, a2, s1
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, s0, .LBB916_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: .LBB916_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a1)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a2)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, s0
+; CHECK-RV64VC-NEXT: sub a1, a3, a7
+; CHECK-RV64VC-NEXT: sltu a2, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB916_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB916_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sub a2, s2, a6
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, s2, a2
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: addi a2, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a7, .LBB916_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a2, a7
+; CHECK-RV64VC-NEXT: .LBB916_10:
+; CHECK-RV64VC-NEXT: mv a1, a2
+; CHECK-RV64VC-NEXT: bltu a2, s0, .LBB916_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB916_12:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a2, s0
+; CHECK-RV64VC-NEXT: sub a3, a0, a7
+; CHECK-RV64VC-NEXT: sltu a2, a2, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a3
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a2
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 3
+; CHECK-RV64VC-NEXT: mv a3, a2
+; CHECK-RV64VC-NEXT: slli a2, a2, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a3
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB916_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB916_14:
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 5
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a5, a1, 4
+; CHECK-RV32VC-NEXT: slli a2, a1, 2
+; CHECK-RV32VC-NEXT: slli a6, a1, 3
+; CHECK-RV32VC-NEXT: mv a4, a3
+; CHECK-RV32VC-NEXT: bltu a3, a2, .LBB916_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a4, a2
+; CHECK-RV32VC-NEXT: .LBB916_2:
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: add a7, a0, a5
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: add a0, a0, a6
+; CHECK-RV32VC-NEXT: mv a5, a4
+; CHECK-RV32VC-NEXT: bltu a4, a1, .LBB916_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a5, a1
+; CHECK-RV32VC-NEXT: .LBB916_4:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a6, vlenb
+; CHECK-RV32VC-NEXT: slli a6, a6, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a6
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a7)
+; CHECK-RV32VC-NEXT: addi a6, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: vsetvli zero, a5, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a4, a1
+; CHECK-RV32VC-NEXT: sub a2, a3, a2
+; CHECK-RV32VC-NEXT: sltu a4, a4, a0
+; CHECK-RV32VC-NEXT: sltu a3, a3, a2
+; CHECK-RV32VC-NEXT: addi a4, a4, -1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a0
+; CHECK-RV32VC-NEXT: and a0, a3, a2
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB916_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB916_6:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: sub a1, a0, a1
+; CHECK-RV32VC-NEXT: sltu a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv64i8_S1(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a2, a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64V-NEXT: mv s1, a7
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s0, vlenb
+; CHECK-RV64V-NEXT: li a1, 48
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s0, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s0, 2
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: bltu s1, a2, .LBB917_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: .LBB917_2:
+; CHECK-RV64V-NEXT: slli a5, s0, 4
+; CHECK-RV64V-NEXT: slli a1, s0, 1
+; CHECK-RV64V-NEXT: slli a6, s0, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB917_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a4, a1
+; CHECK-RV64V-NEXT: .LBB917_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (s2)
+; CHECK-RV64V-NEXT: add a7, s2, a0
+; CHECK-RV64V-NEXT: add a5, s2, a5
+; CHECK-RV64V-NEXT: add a6, s2, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, s0, .LBB917_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: .LBB917_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a7)
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: add a7, sp, a7
+; CHECK-RV64V-NEXT: addi a7, a7, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: mv a6, a5
+; CHECK-RV64V-NEXT: slli a5, a5, 1
+; CHECK-RV64V-NEXT: add a5, a5, a6
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, s0
+; CHECK-RV64V-NEXT: sub a5, a3, a1
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a5
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a5
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64V-NEXT: mv a3, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB917_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB917_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64V-NEXT: sub a3, a0, s0
+; CHECK-RV64V-NEXT: sub a2, s1, a2
+; CHECK-RV64V-NEXT: sltu a0, a0, a3
+; CHECK-RV64V-NEXT: sltu a4, s1, a2
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: and a3, a0, a3
+; CHECK-RV64V-NEXT: and a0, a4, a2
+; CHECK-RV64V-NEXT: addi a2, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB917_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB917_10:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: bltu a2, s0, .LBB917_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB917_12:
+; CHECK-RV64V-NEXT: csrr a4, vlenb
+; CHECK-RV64V-NEXT: slli a4, a4, 3
+; CHECK-RV64V-NEXT: add a4, sp, a4
+; CHECK-RV64V-NEXT: addi a4, a4, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: sub a3, a2, s0
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a2, a2, a3
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a2, a2, a3
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB917_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv a1, s0
+; CHECK-RV64V-NEXT: .LBB917_14:
+; CHECK-RV64V-NEXT: csrr a2, vlenb
+; CHECK-RV64V-NEXT: slli a2, a2, 5
+; CHECK-RV64V-NEXT: add a2, sp, a2
+; CHECK-RV64V-NEXT: addi a2, a2, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64V-NEXT: sub a1, a0, s0
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a1, a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a5, a1, 4
+; CHECK-RV32V-NEXT: slli a2, a1, 2
+; CHECK-RV32V-NEXT: slli a6, a1, 3
+; CHECK-RV32V-NEXT: mv a4, a3
+; CHECK-RV32V-NEXT: bltu a3, a2, .LBB917_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a4, a2
+; CHECK-RV32V-NEXT: .LBB917_2:
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: add a5, a0, a5
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: add a0, a0, a6
+; CHECK-RV32V-NEXT: mv a6, a4
+; CHECK-RV32V-NEXT: bltu a4, a1, .LBB917_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a6, a1
+; CHECK-RV32V-NEXT: .LBB917_4:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a7, vlenb
+; CHECK-RV32V-NEXT: slli a7, a7, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a7
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a5)
+; CHECK-RV32V-NEXT: addi a5, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: vsetvli zero, a6, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a4, a1
+; CHECK-RV32V-NEXT: sub a2, a3, a2
+; CHECK-RV32V-NEXT: sltu a4, a4, a0
+; CHECK-RV32V-NEXT: sltu a3, a3, a2
+; CHECK-RV32V-NEXT: addi a4, a4, -1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a4, a4, a0
+; CHECK-RV32V-NEXT: and a0, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB917_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB917_6:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: sub a1, a0, a1
+; CHECK-RV32V-NEXT: sltu a0, a0, a1
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a0, a0, a1
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64VC-NEXT: mv s2, a7
+; CHECK-RV64VC-NEXT: mv s1, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s0, vlenb
+; CHECK-RV64VC-NEXT: li a1, 48
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a6, s0, 2
+; CHECK-RV64VC-NEXT: mv a3, s2
+; CHECK-RV64VC-NEXT: bltu s2, a6, .LBB917_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a3, a6
+; CHECK-RV64VC-NEXT: .LBB917_2:
+; CHECK-RV64VC-NEXT: slli a5, s0, 4
+; CHECK-RV64VC-NEXT: slli a7, s0, 1
+; CHECK-RV64VC-NEXT: slli a2, s0, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a7, .LBB917_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a4, a7
+; CHECK-RV64VC-NEXT: .LBB917_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (s1)
+; CHECK-RV64VC-NEXT: add a1, s1, a0
+; CHECK-RV64VC-NEXT: add a5, a5, s1
+; CHECK-RV64VC-NEXT: add a2, a2, s1
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, s0, .LBB917_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: .LBB917_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a1)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a2)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, s0
+; CHECK-RV64VC-NEXT: sub a1, a3, a7
+; CHECK-RV64VC-NEXT: sltu a2, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB917_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB917_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sub a2, s2, a6
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, s2, a2
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: addi a2, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a7, .LBB917_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a2, a7
+; CHECK-RV64VC-NEXT: .LBB917_10:
+; CHECK-RV64VC-NEXT: mv a1, a2
+; CHECK-RV64VC-NEXT: bltu a2, s0, .LBB917_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB917_12:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a2, s0
+; CHECK-RV64VC-NEXT: sub a3, a0, a7
+; CHECK-RV64VC-NEXT: sltu a2, a2, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a3
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a2
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 3
+; CHECK-RV64VC-NEXT: mv a3, a2
+; CHECK-RV64VC-NEXT: slli a2, a2, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a3
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB917_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB917_14:
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 5
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a5, a1, 4
+; CHECK-RV32VC-NEXT: slli a2, a1, 2
+; CHECK-RV32VC-NEXT: slli a6, a1, 3
+; CHECK-RV32VC-NEXT: mv a4, a3
+; CHECK-RV32VC-NEXT: bltu a3, a2, .LBB917_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a4, a2
+; CHECK-RV32VC-NEXT: .LBB917_2:
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: add a7, a0, a5
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: add a0, a0, a6
+; CHECK-RV32VC-NEXT: mv a5, a4
+; CHECK-RV32VC-NEXT: bltu a4, a1, .LBB917_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a5, a1
+; CHECK-RV32VC-NEXT: .LBB917_4:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a6, vlenb
+; CHECK-RV32VC-NEXT: slli a6, a6, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a6
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a7)
+; CHECK-RV32VC-NEXT: addi a6, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: vsetvli zero, a5, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a4, a1
+; CHECK-RV32VC-NEXT: sub a2, a3, a2
+; CHECK-RV32VC-NEXT: sltu a4, a4, a0
+; CHECK-RV32VC-NEXT: sltu a3, a3, a2
+; CHECK-RV32VC-NEXT: addi a4, a4, -1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a0
+; CHECK-RV32VC-NEXT: and a0, a3, a2
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB917_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB917_6:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: sub a1, a0, a1
+; CHECK-RV32VC-NEXT: sltu a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_nxv64i8_ALL(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a2, a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64V-NEXT: mv s1, a7
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s0, vlenb
+; CHECK-RV64V-NEXT: li a1, 48
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s0, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s0, 2
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: bltu s1, a2, .LBB918_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: .LBB918_2:
+; CHECK-RV64V-NEXT: slli a5, s0, 4
+; CHECK-RV64V-NEXT: slli a1, s0, 1
+; CHECK-RV64V-NEXT: slli a6, s0, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB918_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a4, a1
+; CHECK-RV64V-NEXT: .LBB918_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (s2)
+; CHECK-RV64V-NEXT: add a7, s2, a0
+; CHECK-RV64V-NEXT: add a5, s2, a5
+; CHECK-RV64V-NEXT: add a6, s2, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, s0, .LBB918_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: .LBB918_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a7)
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: add a7, sp, a7
+; CHECK-RV64V-NEXT: addi a7, a7, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: mv a6, a5
+; CHECK-RV64V-NEXT: slli a5, a5, 1
+; CHECK-RV64V-NEXT: add a5, a5, a6
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, s0
+; CHECK-RV64V-NEXT: sub a5, a3, a1
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a5
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a5
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64V-NEXT: mv a3, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB918_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB918_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64V-NEXT: sub a3, a0, s0
+; CHECK-RV64V-NEXT: sub a2, s1, a2
+; CHECK-RV64V-NEXT: sltu a0, a0, a3
+; CHECK-RV64V-NEXT: sltu a4, s1, a2
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: and a3, a0, a3
+; CHECK-RV64V-NEXT: and a0, a4, a2
+; CHECK-RV64V-NEXT: addi a2, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB918_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB918_10:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: bltu a2, s0, .LBB918_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB918_12:
+; CHECK-RV64V-NEXT: csrr a4, vlenb
+; CHECK-RV64V-NEXT: slli a4, a4, 3
+; CHECK-RV64V-NEXT: add a4, sp, a4
+; CHECK-RV64V-NEXT: addi a4, a4, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: sub a3, a2, s0
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a2, a2, a3
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a2, a2, a3
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB918_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv a1, s0
+; CHECK-RV64V-NEXT: .LBB918_14:
+; CHECK-RV64V-NEXT: csrr a2, vlenb
+; CHECK-RV64V-NEXT: slli a2, a2, 5
+; CHECK-RV64V-NEXT: add a2, sp, a2
+; CHECK-RV64V-NEXT: addi a2, a2, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64V-NEXT: sub a1, a0, s0
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a1, a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a5, a1, 4
+; CHECK-RV32V-NEXT: slli a2, a1, 2
+; CHECK-RV32V-NEXT: slli a6, a1, 3
+; CHECK-RV32V-NEXT: mv a4, a3
+; CHECK-RV32V-NEXT: bltu a3, a2, .LBB918_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a4, a2
+; CHECK-RV32V-NEXT: .LBB918_2:
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: add a5, a0, a5
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: add a0, a0, a6
+; CHECK-RV32V-NEXT: mv a6, a4
+; CHECK-RV32V-NEXT: bltu a4, a1, .LBB918_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a6, a1
+; CHECK-RV32V-NEXT: .LBB918_4:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a7, vlenb
+; CHECK-RV32V-NEXT: slli a7, a7, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a7
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a5)
+; CHECK-RV32V-NEXT: addi a5, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: vsetvli zero, a6, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a4, a1
+; CHECK-RV32V-NEXT: sub a2, a3, a2
+; CHECK-RV32V-NEXT: sltu a4, a4, a0
+; CHECK-RV32V-NEXT: sltu a3, a3, a2
+; CHECK-RV32V-NEXT: addi a4, a4, -1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a4, a4, a0
+; CHECK-RV32V-NEXT: and a0, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB918_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB918_6:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: sub a1, a0, a1
+; CHECK-RV32V-NEXT: sltu a0, a0, a1
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a0, a0, a1
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64VC-NEXT: mv s2, a7
+; CHECK-RV64VC-NEXT: mv s1, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s0, vlenb
+; CHECK-RV64VC-NEXT: li a1, 48
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a6, s0, 2
+; CHECK-RV64VC-NEXT: mv a3, s2
+; CHECK-RV64VC-NEXT: bltu s2, a6, .LBB918_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a3, a6
+; CHECK-RV64VC-NEXT: .LBB918_2:
+; CHECK-RV64VC-NEXT: slli a5, s0, 4
+; CHECK-RV64VC-NEXT: slli a7, s0, 1
+; CHECK-RV64VC-NEXT: slli a2, s0, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a7, .LBB918_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a4, a7
+; CHECK-RV64VC-NEXT: .LBB918_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (s1)
+; CHECK-RV64VC-NEXT: add a1, s1, a0
+; CHECK-RV64VC-NEXT: add a5, a5, s1
+; CHECK-RV64VC-NEXT: add a2, a2, s1
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, s0, .LBB918_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: .LBB918_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a1)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a2)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, s0
+; CHECK-RV64VC-NEXT: sub a1, a3, a7
+; CHECK-RV64VC-NEXT: sltu a2, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB918_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB918_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sub a2, s2, a6
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, s2, a2
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: addi a2, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a7, .LBB918_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a2, a7
+; CHECK-RV64VC-NEXT: .LBB918_10:
+; CHECK-RV64VC-NEXT: mv a1, a2
+; CHECK-RV64VC-NEXT: bltu a2, s0, .LBB918_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB918_12:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a2, s0
+; CHECK-RV64VC-NEXT: sub a3, a0, a7
+; CHECK-RV64VC-NEXT: sltu a2, a2, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a3
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a2
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 3
+; CHECK-RV64VC-NEXT: mv a3, a2
+; CHECK-RV64VC-NEXT: slli a2, a2, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a3
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB918_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB918_14:
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 5
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a5, a1, 4
+; CHECK-RV32VC-NEXT: slli a2, a1, 2
+; CHECK-RV32VC-NEXT: slli a6, a1, 3
+; CHECK-RV32VC-NEXT: mv a4, a3
+; CHECK-RV32VC-NEXT: bltu a3, a2, .LBB918_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a4, a2
+; CHECK-RV32VC-NEXT: .LBB918_2:
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: add a7, a0, a5
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: add a0, a0, a6
+; CHECK-RV32VC-NEXT: mv a5, a4
+; CHECK-RV32VC-NEXT: bltu a4, a1, .LBB918_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a5, a1
+; CHECK-RV32VC-NEXT: .LBB918_4:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a6, vlenb
+; CHECK-RV32VC-NEXT: slli a6, a6, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a6
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a7)
+; CHECK-RV32VC-NEXT: addi a6, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: vsetvli zero, a5, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a4, a1
+; CHECK-RV32VC-NEXT: sub a2, a3, a2
+; CHECK-RV32VC-NEXT: sltu a4, a4, a0
+; CHECK-RV32VC-NEXT: sltu a3, a3, a2
+; CHECK-RV32VC-NEXT: addi a4, a4, -1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a0
+; CHECK-RV32VC-NEXT: and a0, a3, a2
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB918_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB918_6:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: sub a1, a0, a1
+; CHECK-RV32VC-NEXT: sltu a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_nxv64i8_DEFAULT(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: addi sp, sp, -48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64V-NEXT: .cfi_offset ra, -8
+; CHECK-RV64V-NEXT: .cfi_offset s0, -16
+; CHECK-RV64V-NEXT: .cfi_offset s1, -24
+; CHECK-RV64V-NEXT: .cfi_offset s2, -32
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a2, a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: sub sp, sp, a1
+; CHECK-RV64V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64V-NEXT: mv s1, a7
+; CHECK-RV64V-NEXT: mv s2, a0
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 4
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: csrr s0, vlenb
+; CHECK-RV64V-NEXT: li a1, 48
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 2
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 40
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 5
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: slli a0, s0, 5
+; CHECK-RV64V-NEXT: add a0, s2, a0
+; CHECK-RV64V-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add a0, sp, a0
+; CHECK-RV64V-NEXT: addi a0, a0, 16
+; CHECK-RV64V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: li a1, 24
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: call __muldi3
+; CHECK-RV64V-NEXT: slli a2, s0, 2
+; CHECK-RV64V-NEXT: mv a3, s1
+; CHECK-RV64V-NEXT: bltu s1, a2, .LBB919_2
+; CHECK-RV64V-NEXT: # %bb.1:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: .LBB919_2:
+; CHECK-RV64V-NEXT: slli a5, s0, 4
+; CHECK-RV64V-NEXT: slli a1, s0, 1
+; CHECK-RV64V-NEXT: slli a6, s0, 3
+; CHECK-RV64V-NEXT: mv a4, a3
+; CHECK-RV64V-NEXT: bltu a3, a1, .LBB919_4
+; CHECK-RV64V-NEXT: # %bb.3:
+; CHECK-RV64V-NEXT: mv a4, a1
+; CHECK-RV64V-NEXT: .LBB919_4:
+; CHECK-RV64V-NEXT: vl8re64.v v8, (s2)
+; CHECK-RV64V-NEXT: add a7, s2, a0
+; CHECK-RV64V-NEXT: add a5, s2, a5
+; CHECK-RV64V-NEXT: add a6, s2, a6
+; CHECK-RV64V-NEXT: mv a0, a4
+; CHECK-RV64V-NEXT: bltu a4, s0, .LBB919_6
+; CHECK-RV64V-NEXT: # %bb.5:
+; CHECK-RV64V-NEXT: mv a0, s0
+; CHECK-RV64V-NEXT: .LBB919_6:
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a7)
+; CHECK-RV64V-NEXT: csrr a7, vlenb
+; CHECK-RV64V-NEXT: slli a7, a7, 3
+; CHECK-RV64V-NEXT: add a7, sp, a7
+; CHECK-RV64V-NEXT: addi a7, a7, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64V-NEXT: addi a5, sp, 16
+; CHECK-RV64V-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV64V-NEXT: vl8re64.v v0, (a6)
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: mv a6, a5
+; CHECK-RV64V-NEXT: slli a5, a5, 1
+; CHECK-RV64V-NEXT: add a5, a5, a6
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: csrr a5, vlenb
+; CHECK-RV64V-NEXT: slli a5, a5, 4
+; CHECK-RV64V-NEXT: add a5, sp, a5
+; CHECK-RV64V-NEXT: addi a5, a5, 16
+; CHECK-RV64V-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64V-NEXT: sub a0, a4, s0
+; CHECK-RV64V-NEXT: sub a5, a3, a1
+; CHECK-RV64V-NEXT: sltu a4, a4, a0
+; CHECK-RV64V-NEXT: sltu a3, a3, a5
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: addi a3, a3, -1
+; CHECK-RV64V-NEXT: and a4, a4, a0
+; CHECK-RV64V-NEXT: and a0, a3, a5
+; CHECK-RV64V-NEXT: vsetvli zero, a4, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64V-NEXT: mv a3, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB919_8
+; CHECK-RV64V-NEXT: # %bb.7:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB919_8:
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64V-NEXT: sub a3, a0, s0
+; CHECK-RV64V-NEXT: sub a2, s1, a2
+; CHECK-RV64V-NEXT: sltu a0, a0, a3
+; CHECK-RV64V-NEXT: sltu a4, s1, a2
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: addi a4, a4, -1
+; CHECK-RV64V-NEXT: and a3, a0, a3
+; CHECK-RV64V-NEXT: and a0, a4, a2
+; CHECK-RV64V-NEXT: addi a2, sp, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64V-NEXT: mv a2, a0
+; CHECK-RV64V-NEXT: bltu a0, a1, .LBB919_10
+; CHECK-RV64V-NEXT: # %bb.9:
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: .LBB919_10:
+; CHECK-RV64V-NEXT: mv a3, a2
+; CHECK-RV64V-NEXT: bltu a2, s0, .LBB919_12
+; CHECK-RV64V-NEXT: # %bb.11:
+; CHECK-RV64V-NEXT: mv a3, s0
+; CHECK-RV64V-NEXT: .LBB919_12:
+; CHECK-RV64V-NEXT: csrr a4, vlenb
+; CHECK-RV64V-NEXT: slli a4, a4, 3
+; CHECK-RV64V-NEXT: add a4, sp, a4
+; CHECK-RV64V-NEXT: addi a4, a4, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64V-NEXT: sub a3, a2, s0
+; CHECK-RV64V-NEXT: sub a1, a0, a1
+; CHECK-RV64V-NEXT: sltu a2, a2, a3
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a2, a2, -1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a2, a2, a3
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a3, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 1
+; CHECK-RV64V-NEXT: add a1, a1, a3
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: bltu a0, s0, .LBB919_14
+; CHECK-RV64V-NEXT: # %bb.13:
+; CHECK-RV64V-NEXT: mv a1, s0
+; CHECK-RV64V-NEXT: .LBB919_14:
+; CHECK-RV64V-NEXT: csrr a2, vlenb
+; CHECK-RV64V-NEXT: slli a2, a2, 5
+; CHECK-RV64V-NEXT: add a2, sp, a2
+; CHECK-RV64V-NEXT: addi a2, a2, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64V-NEXT: sub a1, a0, s0
+; CHECK-RV64V-NEXT: sltu a0, a0, a1
+; CHECK-RV64V-NEXT: addi a0, a0, -1
+; CHECK-RV64V-NEXT: and a0, a0, a1
+; CHECK-RV64V-NEXT: csrr a1, vlenb
+; CHECK-RV64V-NEXT: slli a1, a1, 3
+; CHECK-RV64V-NEXT: mv a2, a1
+; CHECK-RV64V-NEXT: slli a1, a1, 2
+; CHECK-RV64V-NEXT: add a1, a1, a2
+; CHECK-RV64V-NEXT: add a1, sp, a1
+; CHECK-RV64V-NEXT: addi a1, a1, 16
+; CHECK-RV64V-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64V-NEXT: csrr a0, vlenb
+; CHECK-RV64V-NEXT: slli a0, a0, 3
+; CHECK-RV64V-NEXT: mv a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a1, a1, a0
+; CHECK-RV64V-NEXT: slli a0, a0, 1
+; CHECK-RV64V-NEXT: add a0, a0, a1
+; CHECK-RV64V-NEXT: add sp, sp, a0
+; CHECK-RV64V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64V-NEXT: .cfi_restore ra
+; CHECK-RV64V-NEXT: .cfi_restore s0
+; CHECK-RV64V-NEXT: .cfi_restore s1
+; CHECK-RV64V-NEXT: .cfi_restore s2
+; CHECK-RV64V-NEXT: addi sp, sp, 48
+; CHECK-RV64V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: csrr a1, vlenb
+; CHECK-RV32V-NEXT: slli a5, a1, 4
+; CHECK-RV32V-NEXT: slli a2, a1, 2
+; CHECK-RV32V-NEXT: slli a6, a1, 3
+; CHECK-RV32V-NEXT: mv a4, a3
+; CHECK-RV32V-NEXT: bltu a3, a2, .LBB919_2
+; CHECK-RV32V-NEXT: # %bb.1:
+; CHECK-RV32V-NEXT: mv a4, a2
+; CHECK-RV32V-NEXT: .LBB919_2:
+; CHECK-RV32V-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32V-NEXT: add a5, a0, a5
+; CHECK-RV32V-NEXT: slli a1, a1, 1
+; CHECK-RV32V-NEXT: add a0, a0, a6
+; CHECK-RV32V-NEXT: mv a6, a4
+; CHECK-RV32V-NEXT: bltu a4, a1, .LBB919_4
+; CHECK-RV32V-NEXT: # %bb.3:
+; CHECK-RV32V-NEXT: mv a6, a1
+; CHECK-RV32V-NEXT: .LBB919_4:
+; CHECK-RV32V-NEXT: addi sp, sp, -16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32V-NEXT: csrr a7, vlenb
+; CHECK-RV32V-NEXT: slli a7, a7, 3
+; CHECK-RV32V-NEXT: sub sp, sp, a7
+; CHECK-RV32V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a5)
+; CHECK-RV32V-NEXT: addi a5, sp, 16
+; CHECK-RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill
+; CHECK-RV32V-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32V-NEXT: vsetvli zero, a6, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32V-NEXT: sub a0, a4, a1
+; CHECK-RV32V-NEXT: sub a2, a3, a2
+; CHECK-RV32V-NEXT: sltu a4, a4, a0
+; CHECK-RV32V-NEXT: sltu a3, a3, a2
+; CHECK-RV32V-NEXT: addi a4, a4, -1
+; CHECK-RV32V-NEXT: addi a3, a3, -1
+; CHECK-RV32V-NEXT: and a4, a4, a0
+; CHECK-RV32V-NEXT: and a0, a3, a2
+; CHECK-RV32V-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32V-NEXT: mv a2, a0
+; CHECK-RV32V-NEXT: bltu a0, a1, .LBB919_6
+; CHECK-RV32V-NEXT: # %bb.5:
+; CHECK-RV32V-NEXT: mv a2, a1
+; CHECK-RV32V-NEXT: .LBB919_6:
+; CHECK-RV32V-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32V-NEXT: sub a1, a0, a1
+; CHECK-RV32V-NEXT: sltu a0, a0, a1
+; CHECK-RV32V-NEXT: addi a0, a0, -1
+; CHECK-RV32V-NEXT: and a0, a0, a1
+; CHECK-RV32V-NEXT: addi a1, sp, 16
+; CHECK-RV32V-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32V-NEXT: csrr a0, vlenb
+; CHECK-RV32V-NEXT: slli a0, a0, 3
+; CHECK-RV32V-NEXT: add sp, sp, a0
+; CHECK-RV32V-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32V-NEXT: addi sp, sp, 16
+; CHECK-RV32V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: addi sp, sp, -48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 48
+; CHECK-RV64VC-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64VC-NEXT: .cfi_offset ra, -8
+; CHECK-RV64VC-NEXT: .cfi_offset s0, -16
+; CHECK-RV64VC-NEXT: .cfi_offset s1, -24
+; CHECK-RV64VC-NEXT: .cfi_offset s2, -32
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: sub sp, sp, a1
+; CHECK-RV64VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; CHECK-RV64VC-NEXT: mv s2, a7
+; CHECK-RV64VC-NEXT: mv s1, a0
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 4
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: csrr s0, vlenb
+; CHECK-RV64VC-NEXT: li a1, 48
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 2
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 40
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: slli a0, s0, 5
+; CHECK-RV64VC-NEXT: add a0, a0, s1
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (a0)
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add a0, a0, sp
+; CHECK-RV64VC-NEXT: addi a0, a0, 16
+; CHECK-RV64VC-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: li a1, 24
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: call __muldi3
+; CHECK-RV64VC-NEXT: slli a6, s0, 2
+; CHECK-RV64VC-NEXT: mv a3, s2
+; CHECK-RV64VC-NEXT: bltu s2, a6, .LBB919_2
+; CHECK-RV64VC-NEXT: # %bb.1:
+; CHECK-RV64VC-NEXT: mv a3, a6
+; CHECK-RV64VC-NEXT: .LBB919_2:
+; CHECK-RV64VC-NEXT: slli a5, s0, 4
+; CHECK-RV64VC-NEXT: slli a7, s0, 1
+; CHECK-RV64VC-NEXT: slli a2, s0, 3
+; CHECK-RV64VC-NEXT: mv a4, a3
+; CHECK-RV64VC-NEXT: bltu a3, a7, .LBB919_4
+; CHECK-RV64VC-NEXT: # %bb.3:
+; CHECK-RV64VC-NEXT: mv a4, a7
+; CHECK-RV64VC-NEXT: .LBB919_4:
+; CHECK-RV64VC-NEXT: vl8re64.v v8, (s1)
+; CHECK-RV64VC-NEXT: add a1, s1, a0
+; CHECK-RV64VC-NEXT: add a5, a5, s1
+; CHECK-RV64VC-NEXT: add a2, a2, s1
+; CHECK-RV64VC-NEXT: mv a0, a4
+; CHECK-RV64VC-NEXT: bltu a4, s0, .LBB919_6
+; CHECK-RV64VC-NEXT: # %bb.5:
+; CHECK-RV64VC-NEXT: mv a0, s0
+; CHECK-RV64VC-NEXT: .LBB919_6:
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a1)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v16, (a5)
+; CHECK-RV64VC-NEXT: addi a1, sp, 16
+; CHECK-RV64VC-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-RV64VC-NEXT: vl8re64.v v0, (a2)
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 4
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v24, (zero), v16
+; CHECK-RV64VC-NEXT: sub a0, a4, s0
+; CHECK-RV64VC-NEXT: sub a1, a3, a7
+; CHECK-RV64VC-NEXT: sltu a2, a4, a0
+; CHECK-RV64VC-NEXT: sltu a3, a3, a1
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a2, a2, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a1
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v25, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB919_8
+; CHECK-RV64VC-NEXT: # %bb.7:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB919_8:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v26, (zero), v0
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sub a2, s2, a6
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: sltu a3, s2, a2
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: addi a3, a3, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a0
+; CHECK-RV64VC-NEXT: and a0, a3, a2
+; CHECK-RV64VC-NEXT: addi a2, sp, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v27, (zero), v8
+; CHECK-RV64VC-NEXT: mv a2, a0
+; CHECK-RV64VC-NEXT: bltu a0, a7, .LBB919_10
+; CHECK-RV64VC-NEXT: # %bb.9:
+; CHECK-RV64VC-NEXT: mv a2, a7
+; CHECK-RV64VC-NEXT: .LBB919_10:
+; CHECK-RV64VC-NEXT: mv a1, a2
+; CHECK-RV64VC-NEXT: bltu a2, s0, .LBB919_12
+; CHECK-RV64VC-NEXT: # %bb.11:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB919_12:
+; CHECK-RV64VC-NEXT: csrr a3, vlenb
+; CHECK-RV64VC-NEXT: slli a3, a3, 3
+; CHECK-RV64VC-NEXT: add a3, a3, sp
+; CHECK-RV64VC-NEXT: addi a3, a3, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v28, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a2, s0
+; CHECK-RV64VC-NEXT: sub a3, a0, a7
+; CHECK-RV64VC-NEXT: sltu a2, a2, a1
+; CHECK-RV64VC-NEXT: sltu a0, a0, a3
+; CHECK-RV64VC-NEXT: addi a2, a2, -1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a1, a1, a2
+; CHECK-RV64VC-NEXT: and a0, a0, a3
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 3
+; CHECK-RV64VC-NEXT: mv a3, a2
+; CHECK-RV64VC-NEXT: slli a2, a2, 1
+; CHECK-RV64VC-NEXT: add a2, a2, a3
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v29, (zero), v8
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: bltu a0, s0, .LBB919_14
+; CHECK-RV64VC-NEXT: # %bb.13:
+; CHECK-RV64VC-NEXT: mv a1, s0
+; CHECK-RV64VC-NEXT: .LBB919_14:
+; CHECK-RV64VC-NEXT: csrr a2, vlenb
+; CHECK-RV64VC-NEXT: slli a2, a2, 5
+; CHECK-RV64VC-NEXT: add a2, a2, sp
+; CHECK-RV64VC-NEXT: addi a2, a2, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v30, (zero), v8
+; CHECK-RV64VC-NEXT: sub a1, a0, s0
+; CHECK-RV64VC-NEXT: sltu a0, a0, a1
+; CHECK-RV64VC-NEXT: addi a0, a0, -1
+; CHECK-RV64VC-NEXT: and a0, a0, a1
+; CHECK-RV64VC-NEXT: csrr a1, vlenb
+; CHECK-RV64VC-NEXT: slli a1, a1, 3
+; CHECK-RV64VC-NEXT: mv a2, a1
+; CHECK-RV64VC-NEXT: slli a1, a1, 2
+; CHECK-RV64VC-NEXT: add a1, a1, a2
+; CHECK-RV64VC-NEXT: add a1, a1, sp
+; CHECK-RV64VC-NEXT: addi a1, a1, 16
+; CHECK-RV64VC-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v31, (zero), v8
+; CHECK-RV64VC-NEXT: csrr a0, vlenb
+; CHECK-RV64VC-NEXT: slli a0, a0, 3
+; CHECK-RV64VC-NEXT: mv a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a1, a1, a0
+; CHECK-RV64VC-NEXT: slli a0, a0, 1
+; CHECK-RV64VC-NEXT: add a0, a0, a1
+; CHECK-RV64VC-NEXT: add sp, sp, a0
+; CHECK-RV64VC-NEXT: .cfi_def_cfa sp, 48
+; CHECK-RV64VC-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64VC-NEXT: .cfi_restore ra
+; CHECK-RV64VC-NEXT: .cfi_restore s0
+; CHECK-RV64VC-NEXT: .cfi_restore s1
+; CHECK-RV64VC-NEXT: .cfi_restore s2
+; CHECK-RV64VC-NEXT: addi sp, sp, 48
+; CHECK-RV64VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: csrr a1, vlenb
+; CHECK-RV32VC-NEXT: slli a5, a1, 4
+; CHECK-RV32VC-NEXT: slli a2, a1, 2
+; CHECK-RV32VC-NEXT: slli a6, a1, 3
+; CHECK-RV32VC-NEXT: mv a4, a3
+; CHECK-RV32VC-NEXT: bltu a3, a2, .LBB919_2
+; CHECK-RV32VC-NEXT: # %bb.1:
+; CHECK-RV32VC-NEXT: mv a4, a2
+; CHECK-RV32VC-NEXT: .LBB919_2:
+; CHECK-RV32VC-NEXT: vl8re32.v v0, (a0)
+; CHECK-RV32VC-NEXT: add a7, a0, a5
+; CHECK-RV32VC-NEXT: slli a1, a1, 1
+; CHECK-RV32VC-NEXT: add a0, a0, a6
+; CHECK-RV32VC-NEXT: mv a5, a4
+; CHECK-RV32VC-NEXT: bltu a4, a1, .LBB919_4
+; CHECK-RV32VC-NEXT: # %bb.3:
+; CHECK-RV32VC-NEXT: mv a5, a1
+; CHECK-RV32VC-NEXT: .LBB919_4:
+; CHECK-RV32VC-NEXT: addi sp, sp, -16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 16
+; CHECK-RV32VC-NEXT: csrr a6, vlenb
+; CHECK-RV32VC-NEXT: slli a6, a6, 3
+; CHECK-RV32VC-NEXT: sub sp, sp, a6
+; CHECK-RV32VC-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a7)
+; CHECK-RV32VC-NEXT: addi a6, sp, 16
+; CHECK-RV32VC-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
+; CHECK-RV32VC-NEXT: vl8re32.v v24, (a0)
+; CHECK-RV32VC-NEXT: vsetvli zero, a5, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v16
+; CHECK-RV32VC-NEXT: sub a0, a4, a1
+; CHECK-RV32VC-NEXT: sub a2, a3, a2
+; CHECK-RV32VC-NEXT: sltu a4, a4, a0
+; CHECK-RV32VC-NEXT: sltu a3, a3, a2
+; CHECK-RV32VC-NEXT: addi a4, a4, -1
+; CHECK-RV32VC-NEXT: addi a3, a3, -1
+; CHECK-RV32VC-NEXT: and a4, a4, a0
+; CHECK-RV32VC-NEXT: and a0, a3, a2
+; CHECK-RV32VC-NEXT: vsetvli zero, a4, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v10, (zero), v0
+; CHECK-RV32VC-NEXT: mv a2, a0
+; CHECK-RV32VC-NEXT: bltu a0, a1, .LBB919_6
+; CHECK-RV32VC-NEXT: # %bb.5:
+; CHECK-RV32VC-NEXT: mv a2, a1
+; CHECK-RV32VC-NEXT: .LBB919_6:
+; CHECK-RV32VC-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v12, (zero), v24
+; CHECK-RV32VC-NEXT: sub a1, a0, a1
+; CHECK-RV32VC-NEXT: sltu a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a0, a0, -1
+; CHECK-RV32VC-NEXT: and a0, a0, a1
+; CHECK-RV32VC-NEXT: addi a1, sp, 16
+; CHECK-RV32VC-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v14, (zero), v16
+; CHECK-RV32VC-NEXT: csrr a0, vlenb
+; CHECK-RV32VC-NEXT: slli a0, a0, 3
+; CHECK-RV32VC-NEXT: add sp, sp, a0
+; CHECK-RV32VC-NEXT: .cfi_def_cfa sp, 16
+; CHECK-RV32VC-NEXT: addi sp, sp, 16
+; CHECK-RV32VC-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> %val, <vscale x 64 x ptr> %ptrs, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_strided.load_nxv64i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.experimental.vp.strided.load.nxv64i8.i64(ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_strided.load_nxv64i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.experimental.vp.strided.load.nxv64i8.i64(ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_strided.load_nxv64i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.experimental.vp.strided.load.nxv64i8.i64(ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 64 x i8> %x
+}
+
+
+define <vscale x 64 x i8> @test_nontemporal_vp_strided.load_nxv64i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.experimental.vp.strided.load.nxv64i8.i64(ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 64 x i8> %x
+}
+
+define <vscale x 64 x i8> @test_nontemporal_vp_strided.load_nxv64i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 64 x i8> @llvm.experimental.vp.strided.load.nxv64i8.i64(ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 64 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv64i8_P1(<vscale x 64 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv64i8.i64(<vscale x 64 x i8> %val, ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv64i8_PALL(<vscale x 64 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv64i8.i64(<vscale x 64 x i8> %val, ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv64i8_S1(<vscale x 64 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv64i8.i64(<vscale x 64 x i8> %val, ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_nxv64i8_ALL(<vscale x 64 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv64i8.i64(<vscale x 64 x i8> %val, ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_nxv64i8_DEFAULT(<vscale x 64 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_nxv64i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_nxv64i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.nxv64i8.i64(<vscale x 64 x i8> %val, ptr %p, i64 %stride, <vscale x 64 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+!0 = !{i32 1}
+!1 = !{i32 2}
+!2 = !{i32 3}
+!3 = !{i32 4}
+!4 = !{i32 5}
+
diff --git a/llvm/test/CodeGen/RISCV/nontemporal-vp.ll b/llvm/test/CodeGen/RISCV/nontemporal-vp.ll
new file mode 100644
index 0000000000000..51337bf6a3443
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/nontemporal-vp.ll
@@ -0,0 +1,4009 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV64VC
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV32VC
+
+
+define <16 x i8> @test_nontemporal_vp_load_v16i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_load_v16i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_load_v16i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_load_v16i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <16 x i8> %x
+}
+
+define <16 x i8> @test_nontemporal_vp_load_v16i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_v16i8_P1(<16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v16i8.p0(<16 x i8> %val, ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v16i8_PALL(<16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v16i8.p0(<16 x i8> %val, ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v16i8_S1(<16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v16i8.p0(<16 x i8> %val, ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v16i8_ALL(<16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v16i8.p0(<16 x i8> %val, ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_v16i8_DEFAULT(<16 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v16i8.p0(<16 x i8> %val, ptr %p, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <16 x i8> @test_nontemporal_vp_gather_v16i8_P1(<16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_gather_v16i8_PALL(<16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_gather_v16i8_S1(<16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_gather_v16i8_ALL(<16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <16 x i8> %x
+}
+
+define <16 x i8> @test_nontemporal_vp_gather_v16i8_DEFAULT(<16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v16, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v12, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_v16i8_P1(<16 x i8> %val, <16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v16i8_PALL(<16 x i8> %val, <16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v16i8_S1(<16 x i8> %val, <16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v16i8_ALL(<16 x i8> %val, <16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_v16i8_DEFAULT(<16 x i8> %val, <16 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v16
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v12
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <16 x i8> @test_nontemporal_vp_strided.load_v16i8_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_strided.load_v16i8_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_strided.load_v16i8_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <16 x i8> %x
+}
+
+
+define <16 x i8> @test_nontemporal_vp_strided.load_v16i8_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <16 x i8> %x
+}
+
+define <16 x i8> @test_nontemporal_vp_strided.load_v16i8_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <16 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_v16i8_P1(<16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v16i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v16i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v16i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v16i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> %val, ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v16i8_PALL(<16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v16i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v16i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v16i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v16i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> %val, ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v16i8_S1(<16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v16i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v16i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v16i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v16i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> %val, ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v16i8_ALL(<16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v16i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v16i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v16i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v16i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> %val, ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_v16i8_DEFAULT(<16 x i8> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v16i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v16i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v16i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v16i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e8, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse8.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> %val, ptr %p, i64 %stride, <16 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <8 x i16> @test_nontemporal_vp_load_v8i16_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_load_v8i16_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_load_v8i16_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_load_v8i16_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <8 x i16> %x
+}
+
+define <8 x i16> @test_nontemporal_vp_load_v8i16_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_store_v8i16_P1(<8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v8i16.p0(<8 x i16> %val, ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v8i16_PALL(<8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v8i16.p0(<8 x i16> %val, ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v8i16_S1(<8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v8i16.p0(<8 x i16> %val, ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v8i16_ALL(<8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v8i16.p0(<8 x i16> %val, ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_v8i16_DEFAULT(<8 x i16> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse16.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse16.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse16.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v8i16.p0(<8 x i16> %val, ptr %p, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <8 x i16> @test_nontemporal_vp_gather_v8i16_P1(<8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_gather_v8i16_PALL(<8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_gather_v8i16_S1(<8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_gather_v8i16_ALL(<8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <8 x i16> %x
+}
+
+define <8 x i16> @test_nontemporal_vp_gather_v8i16_DEFAULT(<8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v12, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v10, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_v8i16_P1(<8 x i16> %val, <8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v8i16_PALL(<8 x i16> %val, <8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v8i16_S1(<8 x i16> %val, <8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v8i16_ALL(<8 x i16> %val, <8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_v8i16_DEFAULT(<8 x i16> %val, <8 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v12
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v10
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <8 x i16> @test_nontemporal_vp_strided.load_v8i16_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.i64(ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_strided.load_v8i16_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.i64(ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_strided.load_v8i16_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.i64(ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <8 x i16> %x
+}
+
+
+define <8 x i16> @test_nontemporal_vp_strided.load_v8i16_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.i64(ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <8 x i16> %x
+}
+
+define <8 x i16> @test_nontemporal_vp_strided.load_v8i16_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.i64(ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <8 x i16> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_v8i16_P1(<8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v8i16_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v8i16_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v8i16_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v8i16_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v8i16.i64(<8 x i16> %val, ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v8i16_PALL(<8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v8i16_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v8i16_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v8i16_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v8i16_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v8i16.i64(<8 x i16> %val, ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v8i16_S1(<8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v8i16_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v8i16_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v8i16_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v8i16_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v8i16.i64(<8 x i16> %val, ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v8i16_ALL(<8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v8i16_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v8i16_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v8i16_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v8i16_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v8i16.i64(<8 x i16> %val, ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_v8i16_DEFAULT(<8 x i16> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v8i16_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v8i16_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v8i16_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v8i16_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e16, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse16.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v8i16.i64(<8 x i16> %val, ptr %p, i64 %stride, <8 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <4 x i32> @test_nontemporal_vp_load_v4i32_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_load_v4i32_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_load_v4i32_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_load_v4i32_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <4 x i32> %x
+}
+
+define <4 x i32> @test_nontemporal_vp_load_v4i32_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_store_v4i32_P1(<4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v4i32.p0(<4 x i32> %val, ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v4i32_PALL(<4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v4i32.p0(<4 x i32> %val, ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v4i32_S1(<4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v4i32.p0(<4 x i32> %val, ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v4i32_ALL(<4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v4i32.p0(<4 x i32> %val, ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_v4i32_DEFAULT(<4 x i32> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse32.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse32.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse32.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v4i32.p0(<4 x i32> %val, ptr %p, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <4 x i32> @test_nontemporal_vp_gather_v4i32_P1(<4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_gather_v4i32_PALL(<4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_gather_v4i32_S1(<4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_gather_v4i32_ALL(<4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <4 x i32> %x
+}
+
+define <4 x i32> @test_nontemporal_vp_gather_v4i32_DEFAULT(<4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64V-NEXT: vmv.v.v v8, v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v10, (zero), v8
+; CHECK-RV64VC-NEXT: vmv.v.v v8, v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v8, (zero), v8
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_v4i32_P1(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v4i32_PALL(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v4i32_S1(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v4i32_ALL(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_v4i32_DEFAULT(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v10
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <4 x i32> @test_nontemporal_vp_strided.load_v4i32_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.i64(ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_strided.load_v4i32_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.i64(ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_strided.load_v4i32_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.i64(ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <4 x i32> %x
+}
+
+
+define <4 x i32> @test_nontemporal_vp_strided.load_v4i32_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.i64(ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <4 x i32> %x
+}
+
+define <4 x i32> @test_nontemporal_vp_strided.load_v4i32_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.i64(ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <4 x i32> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_v4i32_P1(<4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v4i32_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v4i32_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v4i32_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v4i32_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v4i32.i64(<4 x i32> %val, ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v4i32_PALL(<4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v4i32_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v4i32_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v4i32_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v4i32_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v4i32.i64(<4 x i32> %val, ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v4i32_S1(<4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v4i32_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v4i32_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v4i32_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v4i32_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v4i32.i64(<4 x i32> %val, ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v4i32_ALL(<4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v4i32_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v4i32_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v4i32_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v4i32_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v4i32.i64(<4 x i32> %val, ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_v4i32_DEFAULT(<4 x i32> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v4i32_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v4i32_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v4i32_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v4i32_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse32.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v4i32.i64(<4 x i32> %val, ptr %p, i64 %stride, <4 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <2 x i64> @test_nontemporal_vp_load_v2i64_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_load_v2i64_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_load_v2i64_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_load_v2i64_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <2 x i64> %x
+}
+
+define <2 x i64> @test_nontemporal_vp_load_v2i64_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_store_v2i64_P1(<2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v2i64_PALL(<2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v2i64_S1(<2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_v2i64_ALL(<2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_store_v2i64_DEFAULT(<2 x i64> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vse64.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vse64.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vse64.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %p, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <2 x i64> @test_nontemporal_vp_gather_v2i64_P1(<2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_gather_v2i64_PALL(<2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_gather_v2i64_S1(<2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_gather_v2i64_ALL(<2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <2 x i64> %x
+}
+
+define <2 x i64> @test_nontemporal_vp_gather_v2i64_DEFAULT(<2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_gather_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_gather_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32V-NEXT: vmv.v.v v8, v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_gather_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vluxei64.v v8, (zero), v8
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_gather_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vluxei32.v v9, (zero), v8
+; CHECK-RV32VC-NEXT: vmv.v.v v8, v9
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_scatter_v2i64_P1(<2 x i64> %val, <2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v2i64_PALL(<2 x i64> %val, <2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v2i64_S1(<2 x i64> %val, <2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_scatter_v2i64_ALL(<2 x i64> %val, <2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_scatter_v2i64_DEFAULT(<2 x i64> %val, <2 x ptr> %ptrs, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_scatter_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_scatter_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_scatter_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsoxei64.v v8, (zero), v9
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_scatter_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsoxei32.v v8, (zero), v9
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+define <2 x i64> @test_nontemporal_vp_strided.load_v2i64_P1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_strided.load_v2i64_PALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_strided.load_v2i64_S1(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <2 x i64> %x
+}
+
+
+define <2 x i64> @test_nontemporal_vp_strided.load_v2i64_ALL(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <2 x i64> %x
+}
+
+define <2 x i64> @test_nontemporal_vp_strided.load_v2i64_DEFAULT(ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.load_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.load_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.load_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.load_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vlse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ %x = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <2 x i64> %x
+}
+
+
+define void @test_nontemporal_vp_strided.store_v2i64_P1(<2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v2i64_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v2i64_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v2i64_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v2i64_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> %val, ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v2i64_PALL(<2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v2i64_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v2i64_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v2i64_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v2i64_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> %val, ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v2i64_S1(<2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v2i64_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v2i64_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v2i64_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v2i64_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> %val, ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret void
+}
+
+
+define void @test_nontemporal_vp_strided.store_v2i64_ALL(<2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v2i64_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v2i64_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v2i64_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v2i64_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> %val, ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret void
+}
+
+define void @test_nontemporal_vp_strided.store_v2i64_DEFAULT(<2 x i64> %val, ptr %p, i64 %stride, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_strided.store_v2i64_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_strided.store_v2i64_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_strided.store_v2i64_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_strided.store_v2i64_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vsse64.v v8, (a0), a1
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> %val, ptr %p, i64 %stride, <2 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret void
+}
+
+
+!0 = !{i32 1}
+!1 = !{i32 2}
+!2 = !{i32 3}
+!3 = !{i32 4}
+!4 = !{i32 5}
+
More information about the llvm-commits
mailing list