[Mlir-commits] [mlir] [MLIR][NVVM] Add prefetch Ops (PR #141737)
Srinivasa Ravi
llvmlistbot at llvm.org
Tue Jun 3 04:40:41 PDT 2025
================
@@ -2333,6 +2353,75 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp :
let hasVerifier = 1;
}
+//===----------------------------------------------------------------------===//
+// NVVM Prefetch Ops
+//===----------------------------------------------------------------------===//
+
+def PrefetchCacheLevelL1 : I32EnumCase<"L1", 0, "L1">;
+def PrefetchCacheLevelL2 : I32EnumCase<"L2", 1, "L2">;
+
+def PrefetchCacheLevel : I32Enum<"PrefetchCacheLevel",
+ "NVVM Prefetch Cache Level",
+ [PrefetchCacheLevelL1, PrefetchCacheLevelL2]> {
+ let cppNamespace = "::mlir::NVVM";
+}
+
+def PrefetchCacheLevelAttr : EnumAttr<NVVM_Dialect, PrefetchCacheLevel, "prefetch_cache_level"> {
+ let assemblyFormat = "$value";
+}
+
+def NVVM_PrefetchOp : NVVM_Op<"prefetch"> {
+ let summary = "Brings the cache line containing an address into the specified cache level";
+ let description = [{
+ Operand `addr` can be a global, local or generic address pointer. No
+ operation is performed if `addr` maps to a `shared` memory location.
+
+ The `cacheLevel` attribute specifies the cache level to which the cache line
+ containing the specified address is brought.
+
+ The `evictPriority` attribute is optional and specifies the cache eviction
+ priority when `cacheLevel` is L2.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prefetch-prefetchu)
+ }];
+ let arguments = (ins PrefetchCacheLevelAttr:$cacheLevel,
+ AnyTypeOf<[LLVM_PointerGlobal,
+ LLVM_PointerLocal,
+ LLVM_PointerGeneric]>:$addr,
+ OptionalAttr<CacheEvictionPriorityAttr>:$evictPriority);
+ let assemblyFormat = "`level` `=` $cacheLevel `,` $addr (`,` `evict_priority` `=` $evictPriority^)? attr-dict `:` type($addr)";
+ let hasVerifier = 1;
+
+ let extraClassDeclaration = [{
+ static llvm::Intrinsic::ID getIntrinsicID(Operation &op);
+ }];
+ let llvmBuilder = [{
+ auto intId = NVVM::PrefetchOp::getIntrinsicID(*op);
+ createIntrinsicCall(builder, intId, $addr);
+ }];
+}
+
+def NVVM_PrefetchUniformOp : NVVM_Op<"prefetch.uniform"> {
+ let summary = "Brings the cache line containing an address into the specified uniform cache level";
+ let description = [{
+ Operand `addr` must be a generic address pointer and no operation is
+ performed if `addr` maps to a `const`, `local`, or `shared` memory location.
+
+ The `cacheLevel` attribute specifies the cache level to which the cache line
+ containing the specified address is brought. The only supported level is L1.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-prefetch-prefetchu)
+ }];
+ let arguments = (ins PrefetchCacheLevelAttr:$cacheLevel,
+ LLVM_PointerGeneric:$addr);
+ let assemblyFormat = "`level` `=` $cacheLevel `,` $addr attr-dict `:` type($addr)";
+ let hasVerifier = 1;
+
+ let llvmBuilder = [{
+ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_prefetchu_L1, $addr);
+ }];
+}
+
----------------
Wolfram70 wrote:
Attempting this, the verifier gets a bit bigger since they have different argument types which needs to be checked for. If you think it's worth merging (perhaps with a `UnitAttr` called `uniform` after specifying the level?), I will go ahead and merge this too.
https://github.com/llvm/llvm-project/pull/141737
More information about the Mlir-commits
mailing list