[Mlir-commits] [mlir] Add Lowerings for GPU WMMA F16/F32 ops to ROCDL dialect (PR #69357)

Mon Oct 23 08:03:28 PDT 2023

================
@@ -10,42 +10,91 @@
 
 #include "mlir/Conversion/GPUToROCDL/Runtimes.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Transforms/DialectConversion.h"
 #include <memory>
 
+namespace llvm {
+class StringRef;
+} // namespace llvm
+
 namespace mlir {
 class LLVMTypeConverter;
 class ConversionTarget;
+class OpBuilder;
+class Location;
 class RewritePatternSet;
+class Type;
 
 template <typename OpT>
 class OperationPass;
 
 namespace gpu {
 class GPUModuleOp;
+class MMAMatrixType;
 } // namespace gpu
 
 #define GEN_PASS_DECL_CONVERTGPUOPSTOROCDLOPS
 #include "mlir/Conversion/Passes.h.inc"
 
+namespace amd {
+/// Constant representing 32 workitems in a workgroup.
+const unsigned kWaveFrontSize32 = 32;
+
+/// Constant representing 64 workitems in a workgroup.
+const unsigned kWaveFrontSize64 = 64;
+
+/// Wavefront sizes that are supported by the GPU to ROCDL lowerings.
+const unsigned kWMMASupportedWaveFrontSizes[] = {kWaveFrontSize32,
+                                                 kWaveFrontSize64};
+
+/// Generate ops to get the laneId of the current lane and return it.
+Value getLaneId(PatternRewriter &rewriter, Location loc,
+                unsigned indexBitwidth);
+
+/// Return the LLVM Type corresponding to the MMAMatrixType.
+Type convertWMMAToROCDLLLVMType(gpu::MMAMatrixType matrixType);
----------------
krzysz00 wrote:

Should this be here or in GPUToAMDGPU?

https://github.com/llvm/llvm-project/pull/69357