[Mlir-commits] [mlir] [mlir][amdgpu] Add `amdgpu.swizzle_bitmode` op (PR #135513)

Thu Apr 17 14:49:36 PDT 2025

https://github.com/Hardcode84 updated https://github.com/llvm/llvm-project/pull/135513

>From 7bc22f9a02b14f1126f352419ead5686ffd6bc2d Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Sun, 13 Apr 2025 03:45:19 +0200
Subject: [PATCH 1/2] [mlir][amdgpu] Add `amdgpu.swizzle_bitmode` op

High level wrapper on top of `rocdl.ds_swizzle`. Also some DPP op cleanup while I'm at here.
---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 43 +++++++++++++++----
 mlir/test/Dialect/AMDGPU/ops.mlir             |  7 +++
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 108d7237ff703..a92ebf6d8e108 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -35,6 +35,11 @@ def AMDGPU_Dialect : Dialect {
   let useDefaultAttributePrinterParser = 1;
 }
 
+def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Float">;
+
+def AnyIntegerOrFloatOr1DVector :
+  AnyTypeOf<[AnyIntegerOrFloat, VectorOfRankAndType<[1], [AnyIntegerOrFloat]>]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU general attribute definitions
 //===----------------------------------------------------------------------===//
@@ -533,14 +538,15 @@ def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm",
 def AMDGPU_DPPPermAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_DPPPerm,
   "dpp_perm">;
 
-def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>,
+def AMDGPU_DPPOp : AMDGPU_Op<"dpp",
+    [Pure, SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>,
   Arguments<(ins AnyType:$old,
-                  AnyType:$src,
-                  AMDGPU_DPPPermAttr:$kind,
-                  OptionalAttr<AnyAttrOf<[I32Attr, ArrayAttr, UnitAttr]>>:$permArgument,
-                  DefaultValuedAttr<I32Attr, "0xf">:$row_mask,
-                  DefaultValuedAttr<I32Attr, "0xf">:$bank_mask,
-                  DefaultValuedAttr<BoolAttr, "false">:$bound_ctrl)> {
+                 AnyType:$src,
+                 AMDGPU_DPPPermAttr:$kind,
+                 OptionalAttr<AnyAttrOf<[I32Attr, ArrayAttr, UnitAttr]>>:$permArgument,
+                 DefaultValuedAttr<I32Attr, "0xf">:$row_mask,
+                 DefaultValuedAttr<I32Attr, "0xf">:$bank_mask,
+                 DefaultValuedAttr<BoolAttr, "false">:$bound_ctrl)> {
   let summary = "AMDGPU DPP operation";
   let description = [{
     This operation represents DPP functionality in a GPU program.
@@ -565,6 +571,27 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result",
   let hasVerifier = 1;
 }
 
+def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode",
+    [Pure, AllTypesMatch<["result", "src"]>]>,
+  Arguments<(ins AnyIntegerOrFloatOr1DVector:$src,
+                 I32Attr:$and_mask,
+                 I32Attr:$or_mask,
+                 I32Attr:$xor_mask
+             )> {
+  let summary = "AMDGPU ds_swizzle op, bitmode variant";
+  let description = [{
+    High-level wrapper on bitmode `rocdl.ds_swizzle` op, masks are represented
+    as separate fields so user won't need to do manual bitpacking.
+
+    Supports arbitrary int/float/vector types, which will be repacked to i32 and
+    one or more `rocdl.ds_swizzle` ops during lowering.
+  }];
+  let results = (outs AnyIntegerOrFloatOr1DVector:$result);
+  let assemblyFormat = [{
+    $src $and_mask $or_mask $xor_mask attr-dict `:` type($result)
+  }];
+}
+
 def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
   let summary = "Barrier that includes a wait for LDS memory operations.";
   let description = [{
@@ -794,7 +821,7 @@ def AMDGPU_GatherToLDSOp :
 
     The `$dst`, along with its indices, points to the memory location the subgroup of this thread
     will write to.
-  
+
     Note: only enabled for gfx942 and later.
   }];
   let assemblyFormat = [{
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 665674f2a7873..16b3193d270cb 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -157,3 +157,10 @@ func.func @wmma(%arg0 : vector<16xf16>, %arg1 : vector<8xf16>) -> vector<8xf16>
   %0 = amdgpu.wmma %arg0 * %arg0 + %arg1 : vector<16xf16>, vector<16xf16>, vector<8xf16>
   func.return %0 : vector<8xf16>
 }
+
+// CHECK-LABEL: func @swizzle_bitmode
+func.func @swizzle_bitmode(%arg0 : f32) -> f32 {
+  // CHECK: amdgpu.swizzle_bitmode
+  %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : f32
+  func.return %0 : f32
+}

>From 4b5e10b821fc06ef9feb30c7c7b1c90033272ec1 Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Thu, 17 Apr 2025 23:49:11 +0200
Subject: [PATCH 2/2] invalid type test

---
 mlir/test/Dialect/AMDGPU/invalid.mlir | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 74a421f6dd50f..40f98ff85688c 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -150,3 +150,11 @@ func.func @fat_raw_buffer_cast_stripping_offset_affine_map(%m: memref<8xi32, aff
   %ret = amdgpu.fat_raw_buffer_cast %m resetOffset : memref<8xi32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
   func.return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
 }
+
+// -----
+
+func.func @swizzle_invalid_type(%arg0 : si32) -> si32 {
+  // expected-error at +1 {{amdgpu.swizzle_bitmode' op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1}}
+  %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : si32
+  func.return %0 : si32
+}