[Mlir-commits] [mlir] [mlir][AMDGPU] Add scaled wmma ops for gfx1250 (PR #169854)
Justin Rosner
llvmlistbot at llvm.org
Mon Dec 8 10:34:33 PST 2025
================
@@ -1363,6 +1397,110 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
}
};
+struct ScaledWMMAOpLowering : public ConvertOpToLLVMPattern<ScaledWMMAOp> {
+ ScaledWMMAOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+ : ConvertOpToLLVMPattern<ScaledWMMAOp>(converter), chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(ScaledWMMAOp op, ScaledWMMAOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+ auto outType =
+ typeConverter->convertType<VectorType>(op.getDestD().getType());
+ if (!outType)
+ return rewriter.notifyMatchFailure(op, "type conversion failed");
+
+ if (chipset < Chipset(12, 5, 0))
+ return op->emitOpError("WMMA scale only supported on gfx1250+");
+
+ int64_t m = op.getM();
+ int64_t n = op.getN();
+ int64_t k = op.getK();
+
+ Type aElemType = getElementTypeOrSelf(op.getSourceA().getType());
+ Type bElemType = getElementTypeOrSelf(op.getSourceB().getType());
+
+ std::optional<uint32_t> aFmtCode = smallFloatTypeToFormatCode(aElemType);
+ std::optional<uint32_t> bFmtCode = smallFloatTypeToFormatCode(bElemType);
+
+ if (!aFmtCode || !bFmtCode)
+ return op.emitOpError("unsupported element types for scaled_wmma");
+
+ // Get scale vector types and determine variant (scale vs scale16).
+ auto scaleAVecType = cast<VectorType>(op.getScaleA().getType());
+ auto scaleBVecType = cast<VectorType>(op.getScaleB().getType());
+
+ if (scaleAVecType.getNumElements() != scaleBVecType.getNumElements())
+ return op.emitOpError("scaleA and scaleB must have equal vector length");
+
+ // Extract scale format from element types.
+ Type scaleAElemType = scaleAVecType.getElementType();
+ Type scaleBElemType = scaleBVecType.getElementType();
+
+ std::optional<uint32_t> scaleAFmt = getWmmaScaleFormat(scaleAElemType);
+ std::optional<uint32_t> scaleBFmt = getWmmaScaleFormat(scaleBElemType);
+
+ if (!scaleAFmt || !scaleBFmt)
+ return op.emitOpError("unsupported scale element types");
+
+ // Determine which intrinsic to use based on dimensions.
+ bool isScale16 = (scaleAVecType.getNumElements() == 8);
+ std::optional<StringRef> intrinsicName =
+ getScaledWmmaIntrinsicName(m, n, k, isScale16);
+ if (!intrinsicName)
+ return op.emitOpError("unsupported scaled_wmma dimensions: ")
+ << m << "x" << n << "x" << k;
+
+ SmallVector<NamedAttribute, 8> attrs;
+
+ // The f4 variant does not have fmtA and fmtB attributes.
+ bool is32x16 = (m == 32 && n == 16 && k == 128);
+ if (!is32x16) {
+ attrs.emplace_back("fmtA", rewriter.getI32IntegerAttr(*aFmtCode));
+ attrs.emplace_back("fmtB", rewriter.getI32IntegerAttr(*bFmtCode));
+ }
+
+ // modC uses default value of 0.
+ attrs.emplace_back("modC", rewriter.getI16IntegerAttr(0));
+
+ // Scale attributes.
+ attrs.emplace_back("scaleAType",
+ rewriter.getI32IntegerAttr(op.getScaleAIdx()));
+ attrs.emplace_back("fmtScaleA", rewriter.getI32IntegerAttr(*scaleAFmt));
+ attrs.emplace_back("scaleBType",
+ rewriter.getI32IntegerAttr(op.getScaleBIdx()));
+ attrs.emplace_back("fmtScaleB", rewriter.getI32IntegerAttr(*scaleBFmt));
+
+ // Reuse flags use default value of false.
+ attrs.emplace_back("reuseA", rewriter.getBoolAttr(false));
----------------
justinrosner wrote:
Are you okay with opening up an issue to implement this in a separate PR? Or would you prefer it be included in this one?
https://github.com/llvm/llvm-project/pull/169854
More information about the Mlir-commits
mailing list