[llvm] [CodeGen][RISCV] Add support of RISCV nontemporal to vector predication instructions. (PR #153033)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 11 08:24:32 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: None (daniel-trujillo-bsc)

<details>
<summary>Changes</summary>

This PR adds support for VP intrinsics to be aware of the nontemporal metadata information.

First time contributor here. I hope these changes are simple enough to not be much of a pain to review, and I'm looking forward hear your feedback!.

I'm not a GitHub user, so I had to create a throwaway account for this, but you can write to my BSC email (in the commit and in the web: https://www.bsc.es/trujillo-daniel) to verify my identity. 

---

Patch is 1.80 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153033.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+6) 
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+12-6) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+14) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+32-10) 
- (added) llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll (+40677) 
- (added) llvm/test/CodeGen/RISCV/nontemporal-vp.ll (+4009) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index dc00db9daa3b6..3dab1b1e8712d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -91,6 +91,7 @@ class TargetLowering;
 class TargetMachine;
 class TargetSubtargetInfo;
 class Value;
+class VPIntrinsic;
 
 template <typename T> class GenericSSAContext;
 using SSAContext = GenericSSAContext<Function>;
@@ -1007,6 +1008,11 @@ class SelectionDAG {
     llvm_unreachable("Unknown opcode");
   }
 
+  static MachineMemOperand::Flags
+  getNonTemporalMemFlag(const VPIntrinsic &VPIntrin);
+
+  static MachineMemOperand::Flags getNonTemporalMemFlag(const MemSDNode &N);
+
   /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either any-extending or truncating it.
   LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a21a9b518fcde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2476,10 +2476,13 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
   else
     std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOLoad |
+                                      TLI.getTargetMMOFlags(*N) |
+                                      SelectionDAG::getNonTemporalMemFlag(*N);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      N->getPointerInfo(), MachineMemOperand::MOLoad,
-      LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
-      N->getRanges());
+      N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      Alignment, N->getAAInfo(), N->getRanges());
 
   if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
     SDValue PassThru = MGT->getPassThru();
@@ -4248,10 +4251,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
     std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
 
   SDValue Lo;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOStore |
+                                      TLI.getTargetMMOFlags(*N) |
+                                      SelectionDAG::getNonTemporalMemFlag(*N);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      N->getPointerInfo(), MachineMemOperand::MOStore,
-      LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
-      N->getRanges());
+      N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      Alignment, N->getAAInfo(), N->getRanges());
 
   if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
     SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5ef1746333040..4e6d52846ae44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -56,6 +56,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Casting.h"
@@ -14055,6 +14056,19 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
   SDEI[To] = std::move(NEI);
 }
 
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const VPIntrinsic &VPIntrin) {
+  return VPIntrin.hasMetadata(LLVMContext::MD_nontemporal)
+             ? MachineMemOperand::MONonTemporal
+             : MachineMemOperand::MONone;
+}
+
+MachineMemOperand::Flags
+SelectionDAG::getNonTemporalMemFlag(const MemSDNode &N) {
+  return N.isNonTemporal() ? MachineMemOperand::MONonTemporal
+                           : MachineMemOperand::MONone;
+}
+
 #ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7aa1fadd10dfc..a21992af3ce42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8438,8 +8438,12 @@ void SelectionDAGBuilder::visitVPLoad(
   MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
   bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
   SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+      MachinePointerInfo(PtrOperand), MMOFlags,
       LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
   LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
                      MMO, false /*IsExpanding */);
@@ -8490,9 +8494,12 @@ void SelectionDAGBuilder::visitVPGather(
     Alignment = DAG.getEVTAlign(VT.getScalarType());
   unsigned AS =
     PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(AS), MachineMemOperand::MOLoad,
-      LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+      MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      *Alignment, AAInfo, Ranges);
   SDValue Base, Index, Scale;
   ISD::MemIndexType IndexType;
   bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8530,8 +8537,12 @@ void SelectionDAGBuilder::visitVPStore(
     Alignment = DAG.getEVTAlign(VT);
   SDValue Ptr = OpValues[1];
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+      MachinePointerInfo(PtrOperand), MMOFlags,
       LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
   ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
                       OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
@@ -8553,9 +8564,12 @@ void SelectionDAGBuilder::visitVPScatter(
     Alignment = DAG.getEVTAlign(VT.getScalarType());
   unsigned AS =
       PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(AS), MachineMemOperand::MOStore,
-      LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+      MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      *Alignment, AAInfo);
   SDValue Base, Index, Scale;
   ISD::MemIndexType IndexType;
   bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
@@ -8596,9 +8610,13 @@ void SelectionDAGBuilder::visitVPStridedLoad(
   bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
   SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
   unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOLoad | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(AS), MachineMemOperand::MOLoad,
-      LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+      MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      *Alignment, AAInfo, Ranges);
 
   SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
                                     OpValues[2], OpValues[3], MMO,
@@ -8619,9 +8637,13 @@ void SelectionDAGBuilder::visitVPStridedStore(
     Alignment = DAG.getEVTAlign(VT.getScalarType());
   AAMDNodes AAInfo = VPIntrin.getAAMetadata();
   unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::MOStore | TLI.getTargetMMOFlags(VPIntrin) |
+      SelectionDAG::getNonTemporalMemFlag(VPIntrin);
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MachinePointerInfo(AS), MachineMemOperand::MOStore,
-      LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+      MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+      *Alignment, AAInfo);
 
   SDValue ST = DAG.getStridedStoreVP(
       getMemoryRoot(), DL, OpValues[0], OpValues[1],
diff --git a/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
new file mode 100644
index 0000000000000..4bc6313494d41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/nontemporal-vp-scalable.ll
@@ -0,0 +1,40677 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V
+; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV64VC
+; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v,+c < %s | FileCheck %s -check-prefix=CHECK-RV32VC
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_P1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.p1
+; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.p1
+; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.p1
+; CHECK-RV64VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_P1:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.p1
+; CHECK-RV32VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+  ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_PALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.pall
+; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.pall
+; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.pall
+; CHECK-RV64VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_PALL:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.pall
+; CHECK-RV32VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+  ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_S1(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.s1
+; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.s1
+; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.s1
+; CHECK-RV64VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_S1:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.s1
+; CHECK-RV32VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !3
+  ret <vscale x 1 x i8> %x
+}
+
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_ALL(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.all
+; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.all
+; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.all
+; CHECK-RV64VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_ALL:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.all
+; CHECK-RV32VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !4
+  ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @test_nontemporal_vp_load_nxv1i8_DEFAULT(ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.all
+; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.all
+; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.all
+; CHECK-RV64VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_load_nxv1i8_DEFAULT:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.all
+; CHECK-RV32VC-NEXT:    vle8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  %x = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0
+  ret <vscale x 1 x i8> %x
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_P1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.p1
+; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.p1
+; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.p1
+; CHECK-RV64VC-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_P1:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.p1
+; CHECK-RV32VC-NEXT:    vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !1
+  ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_PALL(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.pall
+; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.pall
+; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.pall
+; CHECK-RV64VC-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL: test_nontemporal_vp_store_nxv1i8_PALL:
+; CHECK-RV32VC:       # %bb.0:
+; CHECK-RV32VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32VC-NEXT:    c.ntl.pall
+; CHECK-RV32VC-NEXT:    vse8.v v8, (a0)
+; CHECK-RV32VC-NEXT:    ret
+  call void @llvm.vp.store.nxv1i8.p0(<vscale x 1 x i8> %val, ptr %p, <vscale x 1 x i1> splat(i1 true), i32 %vl), !nontemporal !0, !riscv-nontemporal-domain !2
+  ret void
+}
+
+
+define void @test_nontemporal_vp_store_nxv1i8_S1(<vscale x 1 x i8> %val, ptr %p, i32 zeroext %vl) {
+; CHECK-RV64V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64V:       # %bb.0:
+; CHECK-RV64V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64V-NEXT:    ntl.s1
+; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64V-NEXT:    ret
+;
+; CHECK-RV32V-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV32V:       # %bb.0:
+; CHECK-RV32V-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV32V-NEXT:    ntl.s1
+; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
+; CHECK-RV32V-NEXT:    ret
+;
+; CHECK-RV64VC-LABEL: test_nontemporal_vp_store_nxv1i8_S1:
+; CHECK-RV64VC:       # %bb.0:
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-RV64VC-NEXT:    c.ntl.s1
+; CHECK-RV64VC-NEXT:    vse8.v v8, (a0)
+; CHECK-RV64VC-NEXT:    ret
+;
+; CHECK-RV32VC-LABEL:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/153033


More information about the llvm-commits mailing list