[llvm] [CodeGen][RISCV] Add support of RISCV nontemporal to vector predication instructions. (PR #153033)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 08:24:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: None (daniel-trujillo-bsc)
<details>
<summary>Changes</summary>
This PR adds support for VP intrinsics to be aware of the nontemporal metadata information.
First time contributor here. I hope these changes are simple enough to not be much of a pain to review, and I'm looking forward hear your feedback!.
I'm not a GitHub user, so I had to create a throwaway account for this, but you can write to my BSC email (in the commit and in the web: https://www.bsc.es/trujillo-daniel) to verify my identity.
---
Patch is 1.80 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153033.diff
6 Files Affected:
- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+12-6)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+14)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+32-10)
- (added) llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll (+40677)
- (added) llvm/test/CodeGen/RISCV/nontemporal-vp.ll (+4009)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index dc00db9daa3b6..3dab1b1e8712d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -91,6 +91,7 @@ class TargetLowering;
class TargetMachine;
class TargetSubtargetInfo;
class Value;
+class VPIntrinsic;
template <typename T> class GenericSSAContext;
using SSAContext = GenericSSAContext<Function>;
@@ -1007,6 +1008,11 @@ class SelectionDAG {
llvm_unreachable("Unknown opcode");
}
+ static MachineMemOperand::Flags
+ getNonTemporalMemFlag(const VPIntrinsic &VPIntrin);
+
+ static MachineMemOperand::Flags getNonTemporalMemFlag(const MemSDNode &N);
+
/// Convert Op, which must be of integer type, to the
/// integer type VT, by either any-extending or truncating it.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a21a9b518fcde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2476,10 +2476,13 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOLoad |
+ TLI.getTargetMMOFlags(*N) |
+ SelectionDAG::getNonTemporalMemFlag(*N);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
SDValue PassThru = MGT->getPassThru();
@@ -4248,10 +4251,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
SDValue Lo;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOStore |
+ TLI.getTargetMMOFlags(*N) |
+ SelectionDAG::getNonTemporalMemFlag(*N);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5ef1746333040..4e6d52846ae44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
@@ -14055,6 +14056,19 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
SDEI[To] = std::move(NEI);
}
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const VPIntrinsic &VPIntrin) {
+ return VPIntrin.hasMetadata(LLVMContext::MD_nontemporal)
+ ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+}
+
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const MemSDNode &N) {
+ return N.isNonTemporal() ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7aa1fadd10dfc..a21992af3ce42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8438,8 +8438,12 @@ void SelectionDAGBuilder::visitVPLoad(
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
@@ -8490,9 +8494,12 @@ void SelectionDAGBuilder::visitVPGather(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8530,8 +8537,12 @@ void SelectionDAGBuilder::visitVPStore(
Alignment = DAG.getEVTAlign(VT);
SDValue Ptr = OpValues[1];
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
@@ -8553,9 +8564,12 @@ void SelectionDAGBuilder::visitVPScatter(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8596,9 +8610,13 @@ void SelectionDAGBuilder::visitVPStridedLoad(
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO,
@@ -8619,9 +8637,13 @@ void SelectionDAGBuilder::visitVPStridedStore(
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+ SelectionDAG::getNonTemporalMemFlag(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue ST = DAG.getStridedStoreVP(
getMemoryRoot(), DL, OpValues[0], OpValues[1],
diff --git a/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
new file mode 100644
index 0000000000000..4bc6313494d41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
@@ -0,0 +1,40677 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV64VC
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV32VC
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.s1
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+ ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.all
+; CHECK-RV64V-NEXT: vle8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: vle8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.all
+; CHECK-RV64VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.all
+; CHECK-RV32VC-NEXT: vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+ ret <vscale x 1 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_P1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.p1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.p1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.p1
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_PALL(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.pall
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.pall
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32VC: # %bb.0:
+; CHECK-RV32VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT: c.ntl.pall
+; CHECK-RV32VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT: ret
+ call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+ ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_S1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64V: # %bb.0:
+; CHECK-RV64V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT: ntl.s1
+; CHECK-RV64V-NEXT: vse8.v v8, (a0)
+; CHECK-RV64V-NEXT: ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV32V: # %bb.0:
+; CHECK-RV32V-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: vse8.v v8, (a0)
+; CHECK-RV32V-NEXT: ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64VC: # %bb.0:
+; CHECK-RV64VC-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT: c.ntl.s1
+; CHECK-RV64VC-NEXT: vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT: ret
+;
+; CHECK-RV32VC-LABEL:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/153033
More information about the llvm-commits
mailing list