[Mlir-commits] [mlir] 552d26e - [mlir][gpu] Add extra value types for gpu::ShuffleOp (#104605)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Aug 20 11:50:29 PDT 2024
Author: Finlay
Date: 2024-08-20T19:50:25+01:00
New Revision: 552d26e275b094f7ceb96ede9cc478b23e2c12cc
URL: https://github.com/llvm/llvm-project/commit/552d26e275b094f7ceb96ede9cc478b23e2c12cc
DIFF: https://github.com/llvm/llvm-project/commit/552d26e275b094f7ceb96ede9cc478b23e2c12cc.diff
LOG: [mlir][gpu] Add extra value types for gpu::ShuffleOp (#104605)
Expand the accepted types for gpu.shuffle to any integer, float or 1d vector of integers or floats.
Also updated the gpu-to-llvm-spv pass to support those types.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
mlir/test/Dialect/GPU/invalid.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 2f6cbc23ae9db7..d2a5e5d77ad843 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1274,17 +1274,11 @@ def GPU_ShuffleMode : I32EnumAttr<"ShuffleMode",
def GPU_ShuffleModeAttr : EnumAttr<GPU_Dialect, GPU_ShuffleMode,
"shuffle_mode">;
-def I32I64F32OrF64 : TypeConstraint<Or<[I32.predicate,
- I64.predicate,
- F32.predicate,
- F64.predicate]>,
- "i32, i64, f32 or f64">;
-
def GPU_ShuffleOp : GPU_Op<
"shuffle", [Pure, AllTypesMatch<["value", "shuffleResult"]>]>,
- Arguments<(ins I32I64F32OrF64:$value, I32:$offset, I32:$width,
+ Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width,
GPU_ShuffleModeAttr:$mode)>,
- Results<(outs I32I64F32OrF64:$shuffleResult, I1:$valid)> {
+ Results<(outs AnyIntegerOrFloatOr1DVector:$shuffleResult, I1:$valid)> {
let summary = "Shuffles values within a subgroup.";
let description = [{
The "shuffle" op moves values to a across lanes (a.k.a., invocations,
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ced4236402923a..41a3ac76df4b78 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -241,26 +241,34 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
llvm_unreachable("Unhandled shuffle mode");
}
- static StringRef getTypeMangling(Type type) {
- return TypeSwitch<Type, StringRef>(type)
+ static std::optional<StringRef> getTypeMangling(Type type) {
+ return TypeSwitch<Type, std::optional<StringRef>>(type)
+ .Case<Float16Type>([](auto) { return "Dhj"; })
.Case<Float32Type>([](auto) { return "fj"; })
.Case<Float64Type>([](auto) { return "dj"; })
- .Case<IntegerType>([](auto intTy) {
+ .Case<IntegerType>([](auto intTy) -> std::optional<StringRef> {
switch (intTy.getWidth()) {
+ case 8:
+ return "cj";
+ case 16:
+ return "sj";
case 32:
return "ij";
case 64:
return "lj";
}
- llvm_unreachable("Invalid integer width");
- });
+ return std::nullopt;
+ })
+ .Default([](auto) { return std::nullopt; });
}
- static std::string getFuncName(gpu::ShuffleOp op) {
+ static std::optional<std::string> getFuncName(gpu::ShuffleOp op) {
StringRef baseName = getBaseName(op.getMode());
- StringRef typeMangling = getTypeMangling(op.getType(0));
+ std::optional<StringRef> typeMangling = getTypeMangling(op.getType(0));
+ if (!typeMangling)
+ return std::nullopt;
return llvm::formatv("_Z{0}{1}{2}", baseName.size(), baseName,
- typeMangling);
+ typeMangling.value());
}
/// Get the subgroup size from the target or return a default.
@@ -284,7 +292,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
return rewriter.notifyMatchFailure(
op, "shuffle width and subgroup size mismatch");
- std::string funcName = getFuncName(op);
+ std::optional<std::string> funcName = getFuncName(op);
+ if (!funcName)
+ return rewriter.notifyMatchFailure(op, "unsupported value type");
Operation *moduleOp = op->getParentWithTrait<OpTrait::SymbolTable>();
assert(moduleOp && "Expecting module");
@@ -292,7 +302,7 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Type offsetType = adaptor.getOffset().getType();
Type resultType = valueType;
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
- moduleOp, funcName, {valueType, offsetType}, resultType,
+ moduleOp, funcName.value(), {valueType, offsetType}, resultType,
/*isMemNone=*/false, /*isConvergent=*/true);
Location loc = op->getLoc();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ec4f4a304d5073..860bb60726352d 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -317,6 +317,12 @@ gpu.module @shuffles attributes {
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upDhj(f16, i32) -> f16 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-SAME-DAG: will_return
+ // CHECK-NOT: memory_effects = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
@@ -329,26 +335,54 @@ gpu.module @shuffles attributes {
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorsj(i16, i32) -> i16 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-SAME-DAG: will_return
+ // CHECK-NOT: memory_effects = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z17sub_group_shufflecj(i8, i32) -> i8 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-SAME-DAG: will_return
+ // CHECK-NOT: memory_effects = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_shuffles
- // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
- func.func @gpu_shuffles(%val0: i32, %id: i32,
- %val1: i64, %mask: i32,
- %val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) {
+ // CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
+ // CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
+ // CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
+ func.func @gpu_shuffles(%i8_val: i8,
+ %i16_val: i16,
+ %i32_val: i32,
+ %i64_val: i64,
+ %f16_val: f16,
+ %f32_val: f32,
+ %f64_val: f64,
+ %offset: i32) {
%width = arith.constant 16 : i32
- // CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]])
+ // CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]])
+ // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorsj(%[[I16_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]])
+ // CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[I32_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]])
+ // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[I64_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
- %shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32
- %shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64
- %shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32
- %shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64
+ // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upDhj(%[[F16_VAL]], %[[OFFSET]])
+ // CHECK: llvm.mlir.constant(true) : i1
+ // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[F32_VAL]], %[[OFFSET]])
+ // CHECK: llvm.mlir.constant(true) : i1
+ // CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[F64_VAL]], %[[OFFSET]])
+ // CHECK: llvm.mlir.constant(true) : i1
+ %shuffleResult0, %valid0 = gpu.shuffle idx %i8_val, %offset, %width : i8
+ %shuffleResult1, %valid1 = gpu.shuffle xor %i16_val, %offset, %width : i16
+ %shuffleResult2, %valid2 = gpu.shuffle idx %i32_val, %offset, %width : i32
+ %shuffleResult3, %valid3 = gpu.shuffle xor %i64_val, %offset, %width : i64
+ %shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16
+ %shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32
+ %shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64
return
}
}
@@ -378,6 +412,20 @@ gpu.module @shuffles_mismatch {
}
}
+// -----
+
+// Cannot convert due to value type not being supported by the conversion
+
+gpu.module @not_supported_lowering {
+ func.func @gpu_shuffles(%val: i1, %id: i32) {
+ %width = arith.constant 32 : i32
+ // expected-error at below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
+ %shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1
+ return
+ }
+}
+
+
// -----
gpu.module @kernels {
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index ce09190e1b7280..20c1c4cf8a2d0b 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -446,7 +446,7 @@ func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
// -----
func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
- // expected-error at +1 {{operand #0 must be i32, i64, f32 or f64}}
+ // expected-error at +1 {{op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1, but got 'index'}}
%shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index
return
}
More information about the Mlir-commits
mailing list