[Mlir-commits] [mlir] Add Lowerings for GPU WMMA F16/F32 ops to ROCDL dialect (PR #69357)
Krzysztof Drewniak
llvmlistbot at llvm.org
Mon Oct 23 08:03:29 PDT 2023
================
@@ -10,42 +10,91 @@
#include "mlir/Conversion/GPUToROCDL/Runtimes.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Transforms/DialectConversion.h"
#include <memory>
+namespace llvm {
+class StringRef;
+} // namespace llvm
+
namespace mlir {
class LLVMTypeConverter;
class ConversionTarget;
+class OpBuilder;
+class Location;
class RewritePatternSet;
+class Type;
template <typename OpT>
class OperationPass;
namespace gpu {
class GPUModuleOp;
+class MMAMatrixType;
} // namespace gpu
#define GEN_PASS_DECL_CONVERTGPUOPSTOROCDLOPS
#include "mlir/Conversion/Passes.h.inc"
+namespace amd {
+/// Constant representing 32 workitems in a workgroup.
+const unsigned kWaveFrontSize32 = 32;
+
+/// Constant representing 64 workitems in a workgroup.
+const unsigned kWaveFrontSize64 = 64;
+
+/// Wavefront sizes that are supported by the GPU to ROCDL lowerings.
+const unsigned kWMMASupportedWaveFrontSizes[] = {kWaveFrontSize32,
+ kWaveFrontSize64};
+
+/// Generate ops to get the laneId of the current lane and return it.
+Value getLaneId(PatternRewriter &rewriter, Location loc,
+ unsigned indexBitwidth);
+
+/// Return the LLVM Type corresponding to the MMAMatrixType.
+Type convertWMMAToROCDLLLVMType(gpu::MMAMatrixType matrixType);
+} // namespace amd
+
/// Collect a set of patterns to convert from the GPU dialect to ROCDL.
-/// If `runtime` is Unknown, gpu.printf will not be lowered
-/// The resulting pattern set should be run over a gpu.module op
-void populateGpuToROCDLConversionPatterns(LLVMTypeConverter &converter,
- RewritePatternSet &patterns,
- gpu::amd::Runtime runtime);
+/// If `runtime` is Unknown, gpu.printf will not be lowered. The resulting
+/// pattern set should be run over a gpu.module op. `chipset` is the chip we are
+/// targeting. `indexBitwidth` is the bitwidth to be used while converting index
+/// types. `warpSize` is the warp size to use when generating WMMA intrinsics.
+void populateGpuToROCDLConversionPatterns(
+ LLVMTypeConverter &converter, RewritePatternSet &patterns,
+ gpu::amd::Runtime runtime, llvm::StringRef chipset = "gfx900",
+ unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
+ unsigned warpSize = 32);
----------------
krzysz00 wrote:
Same note about deriving this from the chipset version by default
https://github.com/llvm/llvm-project/pull/69357
More information about the Mlir-commits
mailing list