[Mlir-commits] [mlir] Revert "[MLIR][NVVM] Add tcgen05.mma MLIR Ops" (PR #168583)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Nov 18 10:16:08 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Mehdi Amini (joker-eph)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->164356
The bots are broken.
---
Patch is 472.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168583.diff
15 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (-545)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (-612)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir (-229)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir (-229)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir (-119)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir (-442)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir (-229)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir (-229)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir (-442)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir (-634)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir (-633)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir (-133)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir (-133)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir (-133)
- (removed) mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir (-133)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 524b9f820f290..8d5bc7333d47f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -4598,551 +4598,6 @@ def NVVM_ClusterLaunchControlQueryCancelOp
}];
}
-//===----------------------------------------------------------------------===//
-// NVVM tcgen05.mma Ops
-//===----------------------------------------------------------------------===//
-
-def Tcgen05MMAKindF16 : I32EnumAttrCase<"F16", 0, "f16">;
-def Tcgen05MMAKindTF32 : I32EnumAttrCase<"TF32", 1, "tf32">;
-def Tcgen05MMAKindF8F6F4 : I32EnumAttrCase<"F8F6F4", 2, "f8f6f4">;
-def Tcgen05MMAKindINT8 : I32EnumAttrCase<"I8", 3, "i8">;
-
-def Tcgen05MMAKind : I32EnumAttr<
- "Tcgen05MMAKind",
- "tcgen05 MMA Supported Types",
- [Tcgen05MMAKindF8F6F4, Tcgen05MMAKindINT8, Tcgen05MMAKindF16,
- Tcgen05MMAKindTF32]> {
- let cppNamespace = "::mlir::NVVM";
- let genSpecializedAttr = 0;
-}
-
-def Tcgen05MMAKindAttr : EnumAttr<NVVM_Dialect, Tcgen05MMAKind, "tcgen05_mma_kind"> {
- let description = [{
- The Tcgen05MMAKind attribute describes the allowed set of types for matrix A and B in the tcgen05.mma.{sp} Op. The following are supported types for each kind:
-
- ```
- +-------------+--------------------------------------------+
- | Matrix Kind | supported types for A / B |
- +-------------+--------------------------------------------+
- | f16 | f16, bf16 |
- | tf32 | tf32 |
- | f8f6f4 | e4m3, e5m2, e2m3, e3m2, e2m1 |
- | i8 | unsigned 8b, signed 8b |
- +-------------+--------------------------------------------+
- ```
- }];
- let assemblyFormat = "`<` $value `>`";
-}
-
-def Tcgen05MMACollectorOpDiscard : I32EnumAttrCase<"DISCARD", 0, "discard">;
-def Tcgen05MMACollectorOpLastUse : I32EnumAttrCase<"LASTUSE", 1, "lastuse">;
-def Tcgen05MMACollectorOpFill : I32EnumAttrCase<"FILL", 2, "fill">;
-def Tcgen05MMACollectorOpUse : I32EnumAttrCase<"USE", 3, "use">;
-
-def Tcgen05MMACollectorOp : I32EnumAttr<
- "Tcgen05MMACollectorOp",
- "tcgen05.mma Collector Buffer Operation",
- [Tcgen05MMACollectorOpDiscard,
- Tcgen05MMACollectorOpLastUse,
- Tcgen05MMACollectorOpFill,
- Tcgen05MMACollectorOpUse]> {
- let cppNamespace = "::mlir::NVVM";
- let genSpecializedAttr = 0;
-}
-
-def Tcgen05MMACollectorOpAttr : EnumAttr<NVVM_Dialect, Tcgen05MMACollectorOp, "tcgen05_mma_collectorop"> {
- let description = [{
- Tcgen05MMACollectorOp attribute specifies the collector buffer operations.
- The following are the supported operations:
- * discard : Release buffer after use (default)
- * lastuse : Mark buffer for last use
- * fill : Fill buffer
- * use : Use buffer without modification
- }];
- let assemblyFormat = "`<` $value `>`";
-}
-
-def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma",
- [AttrSizedOperandSegments,
- NVVMRequiresSMa<[100, 110]>]> {
- let summary = "Performs MMA operation on 5th-gen tensor cores";
-
- let description = [{
- The `tcgen05.mma` operation is an asynchronous tensor core instruction that
- performs matrix multiplication, accumulation in a single fused operation. It
- targets 5th-generation tensor cores, providing developers with fine-grained
- control over execution and scheduling.
-
- ```
- D = A * B + (D * 2^ -scaleInputD) // if `scaleInputD` is provided
- D = A * B // if `enableInputD` is false
- D = A * B + D // otherwise
- ```
-
- where:
- - A is an `M x K` matrix in tensor memory or described using shared memory descriptor
- - B is a `K x N` matrix described using shared memory descriptor
- - D is an `M x N` accumulator matrix in tensor memory
-
- The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
-
- - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
-
- Optional Operands:
- - `scaleInputD` is an Immediate value operand used for scaling D matrix by 2 ^ (-scaleInputD). The valid range is [0, 15]
-
- - `disableOutputLane` is a vector mask for selective output
- * vector<4 x i32> when ctaGroup is CTA_1
- * vector<8 x i32> when ctaGroup is CTA_2
-
- Required Attributes:
- - `kind` is a Tcgen05MMAKind attribute
-
- - `ctaGroup` specifies CTA group configuration
- * cta_1: MMA will be performed on the current thread's CTA
- * cta_2: MMA will be performed on the current thread and it's peer CTA
-
- Default Attributes:
- - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer
-
- - `aShift` shifts the rows of the A matrix down by one row and can only be
- applied if A is in tensor memory
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma)
- }];
-
- let arguments = (ins
- Tcgen05MMAKindAttr:$kind,
- CTAGroupKindAttr:$ctaGroup,
- DefaultValuedAttr<Tcgen05MMACollectorOpAttr,
- "Tcgen05MMACollectorOp::DISCARD">:$collectorOp,
- UnitAttr:$aShift,
- LLVM_PointerTensor:$matrixD,
- AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA,
- I64:$matrixB,
- I32:$idesc,
- I1:$enableInputD,
- Optional<I64>:$scaleInputD,
- Optional<FixedVectorOfLengthAndType<[4, 8], [I32]>>:$disableOutputLane
- );
-
- let assemblyFormat = [{
- $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`scale` `=` $scaleInputD^)?
- (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)`
- }];
-
- let hasVerifier = true;
-
- let extraClassDeclaration = [{
- static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs(
- Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder);
- }];
-
- let llvmBuilder = [{
- auto [ID, args] = NVVM::Tcgen05MMAOp::getIntrinsicIDAndArgs(
- *op, moduleTranslation, builder);
- createIntrinsicCall(builder, ID, args);
- }];
-}
-
-def NVVM_Tcgen05MMASparseOp : NVVM_Op<"tcgen05.mma.sp",
- [AttrSizedOperandSegments,
- NVVMRequiresSMa<[100, 110]>]> {
- let summary = "Performs MMA operation with sparse A matrix on 5th-gen tensor cores";
-
- let description = [{
- The `tcgen05.mma.sp` operation is an asynchronous tensor core instruction
- that performs matrix multiplication, accumulation with sparse `A` matrix in
- a single fused operation. It targets 5th-generation tensor cores, providing
- developers with fine-grained control over execution and scheduling.
-
- ```
- D = A * B + (D * 2^ -scaleInputD) // if `scaleInputD` is provided
- D = A * B // if `enableInputD` is false
- D = A * B + D // otherwise
- ```
-
- where:
- - A is an `M x (K / 2)` matrix in tensor memory or described using shared memory descriptor
- - B is a `K x N` matrix described using shared memory descriptor
- - D is an `M x N` accumulator matrix in tensor memory
- - sparseMetadata located in tensor memory specifies the mapping of the `K / 2`
- non-zero elements to the K elements before performing the MMA operation
-
- Other attributes and operands are similar to that of tcgen05.mma Op
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp)
- }];
-
- let arguments = (ins
- Tcgen05MMAKindAttr:$kind,
- CTAGroupKindAttr:$ctaGroup,
- DefaultValuedAttr<Tcgen05MMACollectorOpAttr,
- "Tcgen05MMACollectorOp::DISCARD">:$collectorOp,
- UnitAttr:$aShift,
- LLVM_PointerTensor:$matrixD,
- AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA,
- I64:$matrixB,
- I32:$idesc,
- I1:$enableInputD,
- LLVM_PointerTensor:$sparseMetadata,
- Optional<I64>:$scaleInputD,
- Optional<FixedVectorOfLengthAndType<[4, 8], [I32]>>:$disableOutputLane
- );
-
- let assemblyFormat = [{
- $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`scale` `=` $scaleInputD^)? (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)`
- }];
-
- let hasVerifier = true;
-
- let extraClassDeclaration = [{
- static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs(
- Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder);
- }];
-
- let llvmBuilder = [{
- auto [ID, args] = NVVM::Tcgen05MMASparseOp::getIntrinsicIDAndArgs(
- *op, moduleTranslation, builder);
- createIntrinsicCall(builder, ID, args);
- }];
-}
-
-def Tcgen05MMAKindMXF8F6F4 : I32EnumAttrCase<"MXF8F6F4", 0, "mxf8f6f4">;
-def Tcgen05MMAKindMXF4 : I32EnumAttrCase<"MXF4", 1, "mxf4">;
-def Tcgen05MMAKindMXF4NVF4 : I32EnumAttrCase<"MXF4NVF4", 2, "mxf4nvf4">;
-
-def Tcgen05MMABlockScaleKind : I32EnumAttr<
- "Tcgen05MMABlockScaleKind",
- "tcgen05.mma.block_scale supported types",
- [Tcgen05MMAKindMXF8F6F4, Tcgen05MMAKindMXF4, Tcgen05MMAKindMXF4NVF4]> {
- let cppNamespace = "::mlir::NVVM";
- let genSpecializedAttr = 0;
-}
-
-def Tcgen05MMABlockScaleKindAttr : EnumAttr<NVVM_Dialect, Tcgen05MMABlockScaleKind,
- "tcgen05_mma_block_scale_kind"> {
- let description = [{
- The Tcgen05MMABlockScaleKind attribute describes the allowed set of types for matrix A and B in the tcgen05.mma.{sp}.block_scale Op. The following are supported types for each kind:
-
- ```
- +--------------+-------------------------------------------+
- | Matrix Kind | supported types for A / B |
- +--------------+-------------------------------------------+
- | mxf8f6f4 | e4m3, e5m3, e2m3, e3m2, e2m1 |
- | mxf4 | e2m1 |
- | mxf4nvf4 | e2m1 |
- +--------------+-------------------------------------------+
- ```
- }];
- let assemblyFormat = "`<` $value `>`";
-}
-
-def Tcgen05MMABlockScaleDefault : I32EnumAttrCase<"DEFAULT", 0, "default">;
-def Tcgen05MMABlockScaleBlock16 : I32EnumAttrCase<"BLOCK16", 1, "block16">;
-def Tcgen05MMABlockScaleBlock32 : I32EnumAttrCase<"BLOCK32", 2, "block32">;
-
-def Tcgen05MMABlockScale
- : I32EnumAttr<"Tcgen05MMABlockScale",
- "tcgen05.mma block scale attribute",
- [Tcgen05MMABlockScaleDefault, Tcgen05MMABlockScaleBlock16,
- Tcgen05MMABlockScaleBlock32]> {
- let cppNamespace = "::mlir::NVVM";
- let genSpecializedAttr = 0;
-}
-
-def Tcgen05MMABlockScaleAttr : EnumAttr<NVVM_Dialect, Tcgen05MMABlockScale,
- "tcgen05_mma_block_scale"> {
- let assemblyFormat = "`<` $value `>`";
-}
-
-def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale",
- [NVVMRequiresSMa<[100, 110]>]> {
- let summary = "Performs block scaled MMA operation on 5th-gen tensor cores";
-
- let description = [{
- The `tcgen05.mma.block_scale` operation is an asynchronous tensor core instruction
- that performs matrix multiplication, accumulation with block scaling in a
- single fused operation. It targets 5th-generation tensor cores, providing
- developers with fine-grained control over execution and scheduling.
-
- ```
- D = (A * scale_a) * (B * scale_b)` // if `enableInputD` is false
- D = (A * scale_a) * (B * scale_b) + D`
- ```
-
- where:
- - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor
- - B is a K x N matrix described using shared memory descriptor
- - D is an M x N accumulator matrix in tensor memory
- - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively
-
- The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
-
- - `idesc` is a 32 bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
-
- Required Attributes:
- - `kind` is a Tcgen05MMABlockScaleKind attribute
-
- - `ctaGroup` specifies CTA group configuration
- * cta_1: MMA will be performed on the current thread's CTA
- * cta_2: MMA will be performed on the current thread and it's peer CTA
-
- Default Attributes:
- - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma)
- }];
-
- let arguments = (ins
- Tcgen05MMABlockScaleKindAttr:$kind,
- CTAGroupKindAttr:$ctaGroup,
- DefaultValuedAttr<Tcgen05MMABlockScaleAttr,
- "Tcgen05MMABlockScale::DEFAULT">:$blockScale,
- DefaultValuedAttr<Tcgen05MMACollectorOpAttr,
- "Tcgen05MMACollectorOp::DISCARD">:$collectorOp,
- LLVM_PointerTensor:$matrixD,
- AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA,
- I64:$matrixB,
- I32:$idesc, I1:$enableInputD,
- LLVM_PointerTensor:$scaleA,
- LLVM_PointerTensor:$scaleB
- );
-
- let assemblyFormat = [{
- $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $scaleA `,` $scaleB
- attr-dict `:` `(` type(operands) `)`
- }];
-
- let hasVerifier = true;
-
- let extraClassDeclaration = [{
- static mlir::NVVM::IDArgPair
- getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder);
- }];
-
- let llvmBuilder = [{
- auto [ID, args] = NVVM::Tcgen05MMABlockScaleOp::getIntrinsicIDAndArgs(
- *op, moduleTranslation, builder);
- createIntrinsicCall(builder, ID, args);
- }];
-}
-
-def NVVM_Tcgen05MMASparseBlockScaleOp : NVVM_Op<"tcgen05.mma.sp.block_scale",
- [NVVMRequiresSMa<[100, 110]>]> {
- let summary = "Performs block scaled MMA operation with sparse A matrix on 5th-gen tensor cores";
-
- let description = [{
- The `tcgen05.mma.sp.block_scale` operation is an asynchronous tensor core
- instruction that performs matrix multiplication, accumulation with block
- scaling, and sparse `A` matrix in a single fused operation. It targets
- 5th-generation tensor cores, providing developers with fine-grained control
- over execution, and scheduling.
-
- ```
- D = (A * scale_a) * (B * scale_b) // if `enableInputD` is specified
- D = (A * scale_a) * (B * scale_b) + D // otherwise
- ```
-
- where:
- - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor
- - B is a K x N matrix described using shared memory descriptor
- - D is an M x N accumulator matrix in tensor memory
- - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively
-
- Other attributes and operands are similar to that of tcgen05.mma.block_scale Op
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp)
- }];
-
- let arguments = (ins
- Tcgen05MMABlockScaleKindAttr:$kind,
- CTAGroupKindAttr:$ctaGroup,
- DefaultValuedAttr<Tcgen05MMABlockScaleAttr,
- "Tcgen05MMABlockScale::DEFAULT">:$blockScale,
- DefaultValuedAttr<Tcgen05MMACollectorOpAttr,
- "Tcgen05MMACollectorOp::DISCARD">:$collectorOp,
- LLVM_PointerTensor:$matrixD,
- AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA,
- I64:$matrixB,
- I32:$idesc,
- I1:$enableInputD,
- LLVM_PointerTensor:$sparseMetadata,
- LLVM_PointerTensor:$scaleA,
- LLVM_PointerTensor:$scaleB
- );
-
- let assemblyFormat = [{
- $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata `,` $scaleA `,` $scaleB
- attr-dict `:` `(` type(operands) `)`
- }];
-
- let hasVerifier = true;
-
- let extraClassDeclaration = [{
- static mlir::NVVM::IDArgPair
- getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder);
- }];
-
- let llvmBuilder = [{
- auto [ID, args] = NVVM::Tcgen05MMASparseBlockScaleOp::getIntrinsicIDAndArgs(
- *op, moduleTranslation, builder);
- createIntrinsicCall(builder, ID, args);
- }];
-}
-
-def Tcgen05MMACollectorBBuffer0 : I32EnumAttrCase<"B0", 0, "b0">;
-def Tcgen05MMACollectorBBuffer1 : I32EnumAttrCase<"B1", 1, "b1">;
-def Tcgen05MMACollectorBBuffer2 : I32EnumAttrCase<"B2", 2, "b2">;
-def Tcgen05MMACollectorBBuffer3 : I32EnumAttrCase<"B3", 3, "b3">;
-
-def Tcgen05MMACollectorBBuffer : I32EnumAttr<
- "Tcgen05MMACollectorBBuffer",
- "tcgen05 MMA Collector Buffer B Attribute",
- [Tcgen05MMACollectorBBuffer0, Tcgen05MMACollectorBBuffer1, Tcgen05MMACollectorBBuffer2,
- Tcgen05MMACollectorBBuffer3]> {
- let cppNamespace = "::mlir::NVVM";
- let genSpecializedAttr = 0;
-}
-
-def Tcgen05MMACollectorBBufferAttr : EnumAttr<NVVM_Dialect, Tcgen05MMACollectorBBuffer, "tcgen05_mma_collectorb"> {
- let assemblyFormat = "`<` $value `>`";
-}
-
-def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws",
- [NVVMRequiresSMa<[100, 110]>]> {
- let summary = "Performs weight stationary convolution MMA operation on 5th-gen tensor cores";
-
- let description = [{
- The `tcgen05.mma.ws` operation is an asynchronous tensor core instruction
- that performs weight stationary convolution matrix multiplication, accumulation
- in a single fused operation. It targets 5th-generation tensor cores, providing
- developers with fine-grained control over execution, and scheduling.
-
- ```
- D = A * B` // if `enableInputD` is false
- D = A * B + D` // otherwise
- ```
-
- where:
- - A is an `M x K` matrix in tensor memory or described using shared memory descriptor
- - B is a `K x N` matrix described using shared memory descriptor
- - D is an `M x N` accumulator matrix in tensor memory
-
- The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op
-
- - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor)
-
- Optional Operands:
- - zeroColMask is a 64 bit value representing the [Zero-column mask descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-zero-column-mask-descriptor)
-
- Required Attributes:
- - `kind` is a Tcgen05MMAKind attribute
-
- Default Valued Attributes:
- - collectorBBuffer specifies collector buffer for matrix B: b0 (default), b1, b2, b3
-
- - collectorOp is a Tcgen05MMACollectorOp attribute with matrix B as the collector buffer
-
- [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-ws)
- }];
-
- let arguments = (ins
- Tcgen05MMAKindAttr:$kind,
- DefaultValuedAttr<Tcgen05MMACollectorBBufferAttr,
- "Tcgen05MMACollectorBBuffer::B0">:$collectorBBuffer,
- DefaultValuedAttr<Tcgen05MMACollectorOpAttr,
- "Tcgen05MMACollectorOp::DISCARD">:$collectorOp,
- LLVM_PointerTensor:$matrixD,
- AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA,
- I64:$matrixB,
- I32:$idesc,
- I1:$enableInputD,
- Optional<I64>:$zeroColMask
- );
-
- let assemblyFormat = [{
- $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`,` $zeroColMask^)?
- attr-dict `:` `(` type(operands) `)`
- }];
-
- let extraClassDeclaration = [{
- static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs(
- Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder);
- }];
-
- let llvmBuilder = [{
- auto [ID, args] =
- NVVM::Tcgen05MMAWsOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder);
- createIntrinsicCall(builder, ID, args);
- }];
-}
-
-def NVVM_Tcgen...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168583
More information about the Mlir-commits
mailing list