[Mlir-commits] [mlir] [MLIR][NVVM] Add prefetch Ops (PR #141737)

Thu May 29 05:44:26 PDT 2025

================
@@ -2333,6 +2334,90 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp :
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// NVVM Prefetch Ops
+//===----------------------------------------------------------------------===//
+
+def NVVM_PrefetchL1Op : NVVM_Op<"prefetch.L1"> {
+  let description = [{
+    Brings the cache line containing the specified address into L1 cache.
+
+    Operand `ptr` can be a global, local or generic address pointer.
+    No operation is performed if `ptr` maps to a `shared` memory location.
+
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prefetch-prefetchu)
+  }];
+  let arguments = (ins AnyTypeOf<[LLVM_PointerGlobal,
+                                  LLVM_PointerLocal,
+                                  LLVM_PointerGeneric]>:$ptr);
+  let assemblyFormat = "$ptr attr-dict `:` type($ptr)";
+
+  let extraClassDeclaration = [{
+    static llvm::Intrinsic::ID getIntrinsicID(llvm::Type *ptrType);
+  }];
+  let llvmBuilder = [{
+    auto intId = NVVM::PrefetchL1Op::getIntrinsicID($ptr->getType());
+    createIntrinsicCall(builder, intId, $ptr);
+  }];
+}
+
+def EvictLast : I32EnumAttrCase<"EvictLast", 0, "evict_last">;
+def EvictNormal : I32EnumAttrCase<"EvictNormal", 1, "evict_normal">;
+
+def EvictionPriority : I32EnumAttr<"EvictionPriority", "NVVM Eviction Priority",
+    [EvictLast, EvictNormal]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+
+def EvictionPriorityAttr : EnumAttr<NVVM_Dialect, EvictionPriority, "eviction_priority"> {
+  let assemblyFormat = "$value";
+}
+
+def NVVM_PrefetchL2Op : NVVM_Op<"prefetch.L2"> {
+  let description = [{
+    Brings the cache line containing the specified address into L2 cache.
+
+    Operand `ptr` can be a global, local or generic address pointer.
+    No operation is performed if `ptr` maps to a `shared` memory location.
+
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prefetch-prefetchu)
+  }];
+  let arguments = (ins AnyTypeOf<[LLVM_PointerGlobal,
+                                  LLVM_PointerLocal,
+                                  LLVM_PointerGeneric]>:$ptr,
+                       OptionalAttr<EvictionPriorityAttr>:$evictionPriority);
+  let assemblyFormat = "$ptr (`,` `evict_priority` `=` $evictionPriority^)? attr-dict `:` type($ptr)";
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    static llvm::Intrinsic::ID getIntrinsicID(llvm::Type *ptrType, std::optional<NVVM::EvictionPriority> evictionPriority);
+  }];
+  let llvmBuilder = [{
+    auto intId = NVVM::PrefetchL2Op::getIntrinsicID($ptr->getType(), $evictionPriority);
+    createIntrinsicCall(builder, intId, $ptr);
+  }];
+}
+
+def NVVM_PrefetchL1UniformOp : NVVM_Op<"prefetch.L1.uniform"> {
+  let description = [{
+    Brings the cache line containing the specified address into L1 uniform 
+    cache.
+
+    Operand `ptr` is a generic address pointer.
----------------
durga4github wrote:

nit: 
```
Operand must be a generic address pointer and no operation occurs if the address maps to a const, local, or shared memory location.
```

https://github.com/llvm/llvm-project/pull/141737