[Mlir-commits] [mlir] [MLIR][AMDGPU] Add amdgpu.global_transpose_load op for RDNA4 global memory transpose loads (PR #195287)

Krzysztof Drewniak llvmlistbot at llvm.org
Fri May 1 09:51:02 PDT 2026


================
@@ -2226,6 +2226,66 @@ struct TransposeLoadOpLowering
   }
 };
 
+struct GlobalTransposeLoadOpLowering
+    : public ConvertOpToLLVMPattern<GlobalTransposeLoadOp> {
+  GlobalTransposeLoadOpLowering(const LLVMTypeConverter &converter,
+                                Chipset chipset)
+      : ConvertOpToLLVMPattern<GlobalTransposeLoadOp>(converter),
+        chipset(chipset) {}
+
+  Chipset chipset;
+
+  LogicalResult
+  matchAndRewrite(GlobalTransposeLoadOp op,
+                  GlobalTransposeLoadOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (chipset < kGfx1250)
+      return op.emitOpError(
+          "global_transpose_load is only supported on gfx1250+");
+
+    Location loc = op.getLoc();
+    auto srcMemRefType = cast<MemRefType>(op.getSrc().getType());
+    auto resultType = cast<VectorType>(op.getResult().getType());
+
+    Value srcPtr =
+        getStridedElementPtr(rewriter, loc, srcMemRefType, adaptor.getSrc(),
+                             adaptor.getSrcIndices());
----------------
krzysz00 wrote:

Add the noise for this is inbounds so we get `gep nuw inbounds`

https://github.com/llvm/llvm-project/pull/195287


More information about the Mlir-commits mailing list