[Mlir-commits] [mlir] [MLIR][AMDGPU] Added l2-prefetch op to AMDGPU (PR #188457)

Krzysztof Drewniak llvmlistbot at llvm.org
Thu Mar 26 11:48:34 PDT 2026


================
@@ -3950,6 +3950,57 @@ struct AMDGPUTensorLoadStoreOpLowering
   }
 };
 
+struct GlobalPrefetchOpLowering
+    : public ConvertOpToLLVMPattern<GlobalPrefetchOp> {
+  GlobalPrefetchOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+      : ConvertOpToLLVMPattern<GlobalPrefetchOp>(converter), chipset(chipset) {}
+
+  LogicalResult
+  matchAndRewrite(GlobalPrefetchOp op, GlobalPrefetchOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (chipset < kGfx1250)
+      return op->emitOpError("is only supported on gfx1250+");
+
+    const TemporalHint hint = op.getTemporalHint();
+    const bool isSpeculative = op.getSpeculative();
+
+    int32_t immArgValue = static_cast<int32_t>(hint);
+
+    // Note that only RT and HT can operate in both speculative and
+    // non-speculative modes. The other variants (NT_RT, RT_NT, NT_HT, etc.)
+    // operate only in the speculative mode and, therefore, do not require
+    // toggling the least significant bit for mode changes
+    // Temporal hint is encoded in lower bits - i.e. [2:0]
+    if (llvm::is_contained({TemporalHint::RT, TemporalHint::HT}, hint))
+      immArgValue = isSpeculative ? immArgValue : immArgValue | 1;
+
+    // Prefetch scope level is encoded in upper bits - i.e., [4:3]
+    immArgValue = static_cast<int32_t>(op.getCacheScope()) << 3 | immArgValue;
+
+    IntegerAttr immArgAttr = rewriter.getI32IntegerAttr(immArgValue);
+
+    ValueRange indices = adaptor.getIndices();
+    Value memRef = adaptor.getSrc();
+    MemRefDescriptor descriptor(memRef);
+    MemRefType memRefType = op.getSrc().getType();
+    Location loc = op->getLoc();
+    auto inboundsFlags = isSpeculative ? LLVM::GEPNoWrapFlags::none
+                                       : LLVM::GEPNoWrapFlags::inbounds |
+                                             LLVM::GEPNoWrapFlags::nuw;
+    Value prefetchPtr = getStridedElementPtr(
+        rewriter, loc, memRefType, descriptor, indices, inboundsFlags);
+
+    Operation *newOp = ROCDL::GlobalPrefetchOp::create(
+        rewriter, loc, prefetchPtr, immArgAttr, {}, {}, {});
+
+    rewriter.replaceOp(op, newOp);
----------------
krzysz00 wrote:

There's `rewriter.replaceOpWithNewOp` but that's a triviality.

https://github.com/llvm/llvm-project/pull/188457


More information about the Mlir-commits mailing list