[Mlir-commits] [mlir] [mlir][gpu] Add extra value types for gpu::ShuffleOp (PR #104605)

Tue Aug 20 08:06:02 PDT 2024

https://github.com/FMarno updated https://github.com/llvm/llvm-project/pull/104605

>From 482035c702634278a1df90bf1e28a88a499c515e Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Fri, 16 Aug 2024 16:22:13 +0100
Subject: [PATCH 1/3] [mlir] Add extra value types for gpu.shuffle op

Adds the i8, i16, and f16 types.
Also update the gpu-to-llvm-spv pass to support that.
---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td    | 14 ++++----
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  |  5 +++
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 33 ++++++++++---------
 mlir/test/Dialect/GPU/invalid.mlir            |  2 +-
 4 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index a024c3018eb8d3..47c0a8039d942a 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1250,17 +1250,17 @@ def GPU_ShuffleMode : I32EnumAttr<"ShuffleMode",
 def GPU_ShuffleModeAttr : EnumAttr<GPU_Dialect, GPU_ShuffleMode,
                                    "shuffle_mode">;
 
-def I32I64F32OrF64 : TypeConstraint<Or<[I32.predicate,
-                                        I64.predicate,
-                                        F32.predicate,
-                                        F64.predicate]>,
-                                       "i32, i64, f32 or f64">;
+def GPUShuffleTypes : TypeConstraint<Or<[
+  SignlessIntOfWidths<[8, 16, 32, 64]>.predicate,
+  FloatOfWidths<[16, 32, 64]>.predicate
+  ]>,
+  "i8, i16, i32, i64, f16, f32 or f64">;
 
 def GPU_ShuffleOp : GPU_Op<
     "shuffle", [Pure, AllTypesMatch<["value", "shuffleResult"]>]>,
-    Arguments<(ins I32I64F32OrF64:$value, I32:$offset, I32:$width,
+    Arguments<(ins GPUShuffleTypes:$value, I32:$offset, I32:$width,
                GPU_ShuffleModeAttr:$mode)>,
-    Results<(outs I32I64F32OrF64:$shuffleResult, I1:$valid)> {
+    Results<(outs GPUShuffleTypes:$shuffleResult, I1:$valid)> {
   let summary = "Shuffles values within a subgroup.";
   let description = [{
     The "shuffle" op moves values to a across lanes (a.k.a., invocations,
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ced4236402923a..62776cae834eb8 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -243,10 +243,15 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
 
   static StringRef getTypeMangling(Type type) {
     return TypeSwitch<Type, StringRef>(type)
+        .Case<Float16Type>([](auto) { return "Dhj"; })
         .Case<Float32Type>([](auto) { return "fj"; })
         .Case<Float64Type>([](auto) { return "dj"; })
         .Case<IntegerType>([](auto intTy) {
           switch (intTy.getWidth()) {
+          case 8:
+            return "cj";
+          case 16:
+            return "sj";
           case 32:
             return "ij";
           case 64:
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ec4f4a304d5073..eb0da3a95ba4b7 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -311,19 +311,19 @@ gpu.module @shuffles attributes {
   // CHECK-SAME-DAG:  will_return
   // CHECK-NOT:       memory_effects = #llvm.memory_effects
   // CHECK-SAME:      }
-  // CHECK:           llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+  // CHECK:           llvm.func spir_funccc @_Z20sub_group_shuffle_upDhj(f16, i32) -> f16 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
   // CHECK-SAME-DAG:  will_return
   // CHECK-NOT:       memory_effects = #llvm.memory_effects
   // CHECK-SAME:      }
-  // CHECK:           llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+  // CHECK:           llvm.func spir_funccc @_Z21sub_group_shuffle_xorsj(i16, i32) -> i16 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
   // CHECK-SAME-DAG:  will_return
   // CHECK-NOT:       memory_effects = #llvm.memory_effects
   // CHECK-SAME:      }
-  // CHECK:           llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+  // CHECK:           llvm.func spir_funccc @_Z17sub_group_shufflecj(i8, i32) -> i8 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
   // CHECK-SAME-DAG:  will_return
@@ -331,24 +331,25 @@ gpu.module @shuffles attributes {
   // CHECK-SAME:      }
 
   // CHECK-LABEL: gpu_shuffles
-  // CHECK-SAME:              (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
-  func.func @gpu_shuffles(%val0: i32, %id: i32,
-                          %val1: i64, %mask: i32,
-                          %val2: f32, %delta_up: i32,
-                          %val3: f64, %delta_down: i32) {
+  // CHECK-SAME:              (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16, %[[F16_VAL:.*]]: f16, %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32)
+  func.func @gpu_shuffles(%i8_val: i8,
+                          %i16_val: i16,
+                          %f16_val: f16,
+                          %f64_val: f64,
+                          %offset: i32) {
     %width = arith.constant 16 : i32
-    // CHECK:         llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]])
+    // CHECK:         llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
-    // CHECK:         llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]])
+    // CHECK:         llvm.call spir_funccc @_Z21sub_group_shuffle_xorsj(%[[I16_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
-    // CHECK:         llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]])
+    // CHECK:         llvm.call spir_funccc @_Z20sub_group_shuffle_upDhj(%[[F16_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
-    // CHECK:         llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]])
+    // CHECK:         llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[F64_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
-    %shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32
-    %shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64
-    %shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32
-    %shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64
+    %shuffleResult0, %valid0 = gpu.shuffle idx %i8_val, %offset, %width : i8
+    %shuffleResult1, %valid1 = gpu.shuffle xor %i16_val, %offset, %width : i16
+    %shuffleResult2, %valid2 = gpu.shuffle up %f16_val, %offset, %width : f16
+    %shuffleResult3, %valid3 = gpu.shuffle down %f64_val, %offset, %width : f64
     return
   }
 }
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index e9d8f329be8ede..0831a111d07df6 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -430,7 +430,7 @@ func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
 // -----
 
 func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
-  // expected-error at +1 {{operand #0 must be i32, i64, f32 or f64}}
+  // expected-error at +1 {{operand #0 must be i8, i16, i32, i64, f16, f32 or f64}}
   %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index
   return
 }

>From 7041ef76e86c9e1a545523c16463634d027aca99 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Mon, 19 Aug 2024 10:57:10 +0100
Subject: [PATCH 2/3] fixup! [mlir] Add extra value types for gpu.shuffle op

---
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 39 +++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index eb0da3a95ba4b7..9a4e22834b64f3 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -311,12 +311,30 @@ gpu.module @shuffles attributes {
   // CHECK-SAME-DAG:  will_return
   // CHECK-NOT:       memory_effects = #llvm.memory_effects
   // CHECK-SAME:      }
+  // CHECK:           llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+  // CHECK-SAME-DAG:  no_unwind
+  // CHECK-SAME-DAG:  convergent
+  // CHECK-SAME-DAG:  will_return
+  // CHECK-NOT:       memory_effects = #llvm.memory_effects
+  // CHECK-SAME:      }
   // CHECK:           llvm.func spir_funccc @_Z20sub_group_shuffle_upDhj(f16, i32) -> f16 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
   // CHECK-SAME-DAG:  will_return
   // CHECK-NOT:       memory_effects = #llvm.memory_effects
   // CHECK-SAME:      }
+  // CHECK:           llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+  // CHECK-SAME-DAG:  no_unwind
+  // CHECK-SAME-DAG:  convergent
+  // CHECK-SAME-DAG:  will_return
+  // CHECK-NOT:       memory_effects = #llvm.memory_effects
+  // CHECK-SAME:      }
+  // CHECK:           llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+  // CHECK-SAME-DAG:  no_unwind
+  // CHECK-SAME-DAG:  convergent
+  // CHECK-SAME-DAG:  will_return
+  // CHECK-NOT:       memory_effects = #llvm.memory_effects
+  // CHECK-SAME:      }
   // CHECK:           llvm.func spir_funccc @_Z21sub_group_shuffle_xorsj(i16, i32) -> i16 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
@@ -331,10 +349,16 @@ gpu.module @shuffles attributes {
   // CHECK-SAME:      }
 
   // CHECK-LABEL: gpu_shuffles
-  // CHECK-SAME:              (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16, %[[F16_VAL:.*]]: f16, %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32)
+  // CHECK-SAME:              (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
+  // CHECK-SAME:               %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
+  // CHECK-SAME:               %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
+  // CHECK-SAME:               %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32) {
   func.func @gpu_shuffles(%i8_val: i8,
                           %i16_val: i16,
+                          %i32_val: i32,
+                          %i64_val: i64,
                           %f16_val: f16,
+                          %f32_val: f32,
                           %f64_val: f64,
                           %offset: i32) {
     %width = arith.constant 16 : i32
@@ -342,14 +366,23 @@ gpu.module @shuffles attributes {
     // CHECK:         llvm.mlir.constant(true) : i1
     // CHECK:         llvm.call spir_funccc @_Z21sub_group_shuffle_xorsj(%[[I16_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
+    // CHECK:         llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[I32_VAL]], %[[OFFSET]])
+    // CHECK:         llvm.mlir.constant(true) : i1
+    // CHECK:         llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[I64_VAL]], %[[OFFSET]])
+    // CHECK:         llvm.mlir.constant(true) : i1
     // CHECK:         llvm.call spir_funccc @_Z20sub_group_shuffle_upDhj(%[[F16_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
+    // CHECK:         llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[F32_VAL]], %[[OFFSET]])
+    // CHECK:         llvm.mlir.constant(true) : i1
     // CHECK:         llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[F64_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1
     %shuffleResult0, %valid0 = gpu.shuffle idx %i8_val, %offset, %width : i8
     %shuffleResult1, %valid1 = gpu.shuffle xor %i16_val, %offset, %width : i16
-    %shuffleResult2, %valid2 = gpu.shuffle up %f16_val, %offset, %width : f16
-    %shuffleResult3, %valid3 = gpu.shuffle down %f64_val, %offset, %width : f64
+    %shuffleResult2, %valid2 = gpu.shuffle idx %i32_val, %offset, %width : i32
+    %shuffleResult3, %valid3 = gpu.shuffle xor %i64_val, %offset, %width : i64
+    %shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16
+    %shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32
+    %shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64
     return
   }
 }

>From 7623fea5cb9b41e278c7a889fa363cb0c5323783 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 20 Aug 2024 16:05:42 +0100
Subject: [PATCH 3/3] fixup! [mlir] Add extra value types for gpu.shuffle op

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td    | 10 ++------
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  | 25 +++++++++++--------
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 14 +++++++++++
 mlir/test/Dialect/GPU/invalid.mlir            |  2 +-
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 47c0a8039d942a..ece910af24625b 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1250,17 +1250,11 @@ def GPU_ShuffleMode : I32EnumAttr<"ShuffleMode",
 def GPU_ShuffleModeAttr : EnumAttr<GPU_Dialect, GPU_ShuffleMode,
                                    "shuffle_mode">;
 
-def GPUShuffleTypes : TypeConstraint<Or<[
-  SignlessIntOfWidths<[8, 16, 32, 64]>.predicate,
-  FloatOfWidths<[16, 32, 64]>.predicate
-  ]>,
-  "i8, i16, i32, i64, f16, f32 or f64">;
-
 def GPU_ShuffleOp : GPU_Op<
     "shuffle", [Pure, AllTypesMatch<["value", "shuffleResult"]>]>,
-    Arguments<(ins GPUShuffleTypes:$value, I32:$offset, I32:$width,
+    Arguments<(ins AnyIntegerOrFloatOr1DVector:$value, I32:$offset, I32:$width,
                GPU_ShuffleModeAttr:$mode)>,
-    Results<(outs GPUShuffleTypes:$shuffleResult, I1:$valid)> {
+    Results<(outs AnyIntegerOrFloatOr1DVector:$shuffleResult, I1:$valid)> {
   let summary = "Shuffles values within a subgroup.";
   let description = [{
     The "shuffle" op moves values to a across lanes (a.k.a., invocations,
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 62776cae834eb8..41a3ac76df4b78 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -241,12 +241,12 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
     llvm_unreachable("Unhandled shuffle mode");
   }
 
-  static StringRef getTypeMangling(Type type) {
-    return TypeSwitch<Type, StringRef>(type)
+  static std::optional<StringRef> getTypeMangling(Type type) {
+    return TypeSwitch<Type, std::optional<StringRef>>(type)
         .Case<Float16Type>([](auto) { return "Dhj"; })
         .Case<Float32Type>([](auto) { return "fj"; })
         .Case<Float64Type>([](auto) { return "dj"; })
-        .Case<IntegerType>([](auto intTy) {
+        .Case<IntegerType>([](auto intTy) -> std::optional<StringRef> {
           switch (intTy.getWidth()) {
           case 8:
             return "cj";
@@ -257,15 +257,18 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
           case 64:
             return "lj";
           }
-          llvm_unreachable("Invalid integer width");
-        });
+          return std::nullopt;
+        })
+        .Default([](auto) { return std::nullopt; });
   }
 
-  static std::string getFuncName(gpu::ShuffleOp op) {
+  static std::optional<std::string> getFuncName(gpu::ShuffleOp op) {
     StringRef baseName = getBaseName(op.getMode());
-    StringRef typeMangling = getTypeMangling(op.getType(0));
+    std::optional<StringRef> typeMangling = getTypeMangling(op.getType(0));
+    if (!typeMangling)
+      return std::nullopt;
     return llvm::formatv("_Z{0}{1}{2}", baseName.size(), baseName,
-                         typeMangling);
+                         typeMangling.value());
   }
 
   /// Get the subgroup size from the target or return a default.
@@ -289,7 +292,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
       return rewriter.notifyMatchFailure(
           op, "shuffle width and subgroup size mismatch");
 
-    std::string funcName = getFuncName(op);
+    std::optional<std::string> funcName = getFuncName(op);
+    if (!funcName)
+      return rewriter.notifyMatchFailure(op, "unsupported value type");
 
     Operation *moduleOp = op->getParentWithTrait<OpTrait::SymbolTable>();
     assert(moduleOp && "Expecting module");
@@ -297,7 +302,7 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
     Type offsetType = adaptor.getOffset().getType();
     Type resultType = valueType;
     LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
-        moduleOp, funcName, {valueType, offsetType}, resultType,
+        moduleOp, funcName.value(), {valueType, offsetType}, resultType,
         /*isMemNone=*/false, /*isConvergent=*/true);
 
     Location loc = op->getLoc();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 9a4e22834b64f3..860bb60726352d 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -412,6 +412,20 @@ gpu.module @shuffles_mismatch {
   }
 }
 
+// -----
+
+// Cannot convert due to value type not being supported by the conversion
+
+gpu.module @not_supported_lowering {
+  func.func @gpu_shuffles(%val: i1, %id: i32) {
+    %width = arith.constant 32 : i32
+    // expected-error at below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
+    %shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1
+    return
+  }
+}
+
+
 // -----
 
 gpu.module @kernels {
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 0831a111d07df6..ff59c652ac79fd 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -430,7 +430,7 @@ func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
 // -----
 
 func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
-  // expected-error at +1 {{operand #0 must be i8, i16, i32, i64, f16, f32 or f64}}
+  // expected-error at +1 {{op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1, but got 'index'}}
   %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index
   return
 }