[Mlir-commits] [mlir] 0abf227 - [mlir][amdgpu] Add `amdgpu.swizzle_bitmode` op (#135513)

Thu Apr 17 15:23:52 PDT 2025

Author: Ivan Butygin
Date: 2025-04-18T01:23:49+03:00
New Revision: 0abf227c13eb80f15659f747094df2db1c34d20d

URL: https://github.com/llvm/llvm-project/commit/0abf227c13eb80f15659f747094df2db1c34d20d
DIFF: https://github.com/llvm/llvm-project/commit/0abf227c13eb80f15659f747094df2db1c34d20d.diff

LOG: [mlir][amdgpu] Add `amdgpu.swizzle_bitmode` op (#135513)

High level wrapper on top of `rocdl.ds_swizzle`. Also some DPP op
cleanup while I'm at here.

Will do lowering in separate PR.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
    mlir/test/Dialect/AMDGPU/invalid.mlir
    mlir/test/Dialect/AMDGPU/ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 108d7237ff703..a92ebf6d8e108 100644

--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -35,6 +35,11 @@ def AMDGPU_Dialect : Dialect {
   let useDefaultAttributePrinterParser = 1;
 }
 
+def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Float">;
+
+def AnyIntegerOrFloatOr1DVector :
+  AnyTypeOf<[AnyIntegerOrFloat, VectorOfRankAndType<[1], [AnyIntegerOrFloat]>]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU general attribute definitions
 //===----------------------------------------------------------------------===//
@@ -533,14 +538,15 @@ def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm",
 def AMDGPU_DPPPermAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_DPPPerm,
   "dpp_perm">;
 
-def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>,
+def AMDGPU_DPPOp : AMDGPU_Op<"dpp",
+    [Pure, SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>,
   Arguments<(ins AnyType:$old,
-                  AnyType:$src,
-                  AMDGPU_DPPPermAttr:$kind,
-                  OptionalAttr<AnyAttrOf<[I32Attr, ArrayAttr, UnitAttr]>>:$permArgument,
-                  DefaultValuedAttr<I32Attr, "0xf">:$row_mask,
-                  DefaultValuedAttr<I32Attr, "0xf">:$bank_mask,
-                  DefaultValuedAttr<BoolAttr, "false">:$bound_ctrl)> {
+                 AnyType:$src,
+                 AMDGPU_DPPPermAttr:$kind,
+                 OptionalAttr<AnyAttrOf<[I32Attr, ArrayAttr, UnitAttr]>>:$permArgument,
+                 DefaultValuedAttr<I32Attr, "0xf">:$row_mask,
+                 DefaultValuedAttr<I32Attr, "0xf">:$bank_mask,
+                 DefaultValuedAttr<BoolAttr, "false">:$bound_ctrl)> {
   let summary = "AMDGPU DPP operation";
   let description = [{
     This operation represents DPP functionality in a GPU program.
@@ -565,6 +571,27 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result",
   let hasVerifier = 1;
 }
 
+def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode",
+    [Pure, AllTypesMatch<["result", "src"]>]>,
+  Arguments<(ins AnyIntegerOrFloatOr1DVector:$src,
+                 I32Attr:$and_mask,
+                 I32Attr:$or_mask,
+                 I32Attr:$xor_mask
+             )> {
+  let summary = "AMDGPU ds_swizzle op, bitmode variant";
+  let description = [{
+    High-level wrapper on bitmode `rocdl.ds_swizzle` op, masks are represented
+    as separate fields so user won't need to do manual bitpacking.
+
+    Supports arbitrary int/float/vector types, which will be repacked to i32 and
+    one or more `rocdl.ds_swizzle` ops during lowering.
+  }];
+  let results = (outs AnyIntegerOrFloatOr1DVector:$result);
+  let assemblyFormat = [{
+    $src $and_mask $or_mask $xor_mask attr-dict `:` type($result)
+  }];
+}
+
 def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
   let summary = "Barrier that includes a wait for LDS memory operations.";
   let description = [{
@@ -794,7 +821,7 @@ def AMDGPU_GatherToLDSOp :
 
     The `$dst`, along with its indices, points to the memory location the subgroup of this thread
     will write to.
-  
+
     Note: only enabled for gfx942 and later.
   }];
   let assemblyFormat = [{

diff  --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 74a421f6dd50f..40f98ff85688c 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -150,3 +150,11 @@ func.func @fat_raw_buffer_cast_stripping_offset_affine_map(%m: memref<8xi32, aff
   %ret = amdgpu.fat_raw_buffer_cast %m resetOffset : memref<8xi32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
   func.return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
 }
+
+// -----
+
+func.func @swizzle_invalid_type(%arg0 : si32) -> si32 {
+  // expected-error at +1 {{amdgpu.swizzle_bitmode' op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1}}
+  %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : si32
+  func.return %0 : si32
+}

diff  --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 665674f2a7873..16b3193d270cb 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -157,3 +157,10 @@ func.func @wmma(%arg0 : vector<16xf16>, %arg1 : vector<8xf16>) -> vector<8xf16>
   %0 = amdgpu.wmma %arg0 * %arg0 + %arg1 : vector<16xf16>, vector<16xf16>, vector<8xf16>
   func.return %0 : vector<8xf16>
 }
+
+// CHECK-LABEL: func @swizzle_bitmode
+func.func @swizzle_bitmode(%arg0 : f32) -> f32 {
+  // CHECK: amdgpu.swizzle_bitmode
+  %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : f32
+  func.return %0 : f32
+}