[Mlir-commits] [mlir] [mlir][vector] Add support for vector extract/insert_strided_slice in vector distribution. (PR #145421)
Charitha Saumya
llvmlistbot at llvm.org
Wed Jun 25 12:08:48 PDT 2025
================
@@ -1296,6 +1296,86 @@ func.func @vector_insert_2d_broadcast(%laneid: index) -> (vector<4x96xf32>) {
return %r : vector<4x96xf32>
}
+// -----
+// CHECK-PROP-LABEL: func.func @vector_extract_strided_slice_2d_distr_outer(
+// CHECK-RPOP-SAME: %[[LANEID:.*]]: index
+// CHECK-PROP: %[[W:.*]] = gpu.warp_execute_on_lane_0{{.*}} -> (vector<64x1xf32>) {
+// CHECK-PROP: %[[VEC:.*]] = "some_def"() : () -> vector<64x32xf32>
+// CHECK-PROP: gpu.yield %[[VEC]] : vector<64x32xf32>
+// CHECK-PROP: %[[EXTRACT:.*]] = vector.extract_strided_slice %[[W]]
+// CHECK-PROP-SAME: {offsets = [8], sizes = [24], strides = [1]} : vector<64x1xf32> to vector<24x1xf32>
+// CHECK-PROP: return %[[EXTRACT]] : vector<24x1xf32>
+func.func @vector_extract_strided_slice_2d_distr_outer(%laneid: index) -> (vector<24x1xf32>) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<24x1xf32>) {
+ %0 = "some_def"() : () -> (vector<64x32xf32>)
+ %1 = vector.extract_strided_slice %0 { offsets = [8], sizes = [24], strides = [1]}
+ : vector<64x32xf32> to vector<24x32xf32>
+ gpu.yield %1 : vector<24x32xf32>
+ }
+ return %r : vector<24x1xf32>
+}
+
+// -----
+// CHECK-PROP-LABEL: func.func @vector_extract_strided_slice_2d_distr_inner(
+// CHECK-PROP-SAME: %[[LANEID:.*]]: index
+// CHECK-PROP: %[[W:.*]] = gpu.warp_execute_on_lane_0{{.*}} -> (vector<1x64xf32>) {
+// CHECK-PROP: %[[VEC:.*]] = "some_def"() : () -> vector<32x64xf32>
+// CHECK-PROP: gpu.yield %[[VEC]] : vector<32x64xf32>
+// CHECK-PROP: %[[EXTRACT:.*]] = vector.extract_strided_slice %[[W]]
+// CHECK-PROP-SAME: {offsets = [0, 12], sizes = [1, 8], strides = [1, 1]} : vector<1x64xf32> to vector<1x8xf32>
+// CHECK-PROP: return %[[EXTRACT]] : vector<1x8xf32>
+func.func @vector_extract_strided_slice_2d_distr_inner(%laneid: index) -> (vector<1x8xf32>) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1x8xf32>) {
+ %0 = "some_def"() : () -> (vector<32x64xf32>)
+ %1 = vector.extract_strided_slice %0 { offsets = [0, 12], sizes = [32, 8], strides = [1, 1]}
+ : vector<32x64xf32> to vector<32x8xf32>
+ gpu.yield %1 : vector<32x8xf32>
+ }
+ return %r : vector<1x8xf32>
+}
+
+// -----
+// CHECK-PROP-LABEL: func.func @vector_insert_strided_slice_1d_to_2d(
+// CHECK-PROP-SAME: %[[LANEID:.*]]: index)
+// CHECK-PROP: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0({{.*}} -> (vector<1xf32>, vector<64x1xf32>) {
+// CHECK-PROP: %[[SRC:.*]] = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP: %[[DEST:.*]] = "some_def"() : () -> vector<64x32xf32>
+// CHECK-PROP: gpu.yield %[[SRC]], %[[DEST]] : vector<32xf32>, vector<64x32xf32>
+// CHECK-PROP: %[[INSERT:.*]] = vector.insert_strided_slice %[[W]]#0, %[[W]]#1
+// CHECK-PROP-SAME: {offsets = [18, 0], strides = [1]} : vector<1xf32> into vector<64x1xf32>
+// CHECK-PROP: return %[[INSERT]] : vector<64x1xf32>
+func.func @vector_insert_strided_slice_1d_to_2d(%laneid: index) -> (vector<64x1xf32>) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<64x1xf32>) {
+ %0 = "some_def"() : () -> (vector<32xf32>)
+ %1 = "some_def"() : () -> (vector<64x32xf32>)
+ %2 = vector.insert_strided_slice %0, %1 { offsets = [18, 0], strides = [1]}
+ : vector<32xf32> into vector<64x32xf32>
+ gpu.yield %2 : vector<64x32xf32>
+ }
+ return %r : vector<64x1xf32>
+}
+
+// -----
+// CHECK-PROP-LABEL: func.func @vector_insert_strided_slice_2d_to_2d(
+// CHECK-PROP-SAME: %[[LANEID:.*]]: index)
+// CHECK-PROP: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0{{.*}} -> (vector<16x1xf32>, vector<64x1xf32>) {
+// CHECK-PROP: %[[SRC:.*]] = "some_def"() : () -> vector<16x32xf32>
+// CHECK-PROP: %[[DEST:.*]] = "some_def"() : () -> vector<64x32xf32>
+// CHECK-PROP: gpu.yield %[[SRC]], %[[DEST]] : vector<16x32xf32>, vector<64x32xf32>
+// CHECK-PROP: %[[INSERT:.*]] = vector.insert_strided_slice %[[W]]#0, %[[W]]#1 {offsets = [36, 0], strides = [1, 1]} :
+// CHECK-PROP-SAME: vector<16x1xf32> into vector<64x1xf32>
+// CHECK-PROP: return %[[INSERT]] : vector<64x1xf32>
+func.func @vector_insert_strided_slice_2d_to_2d(%laneid: index) -> (vector<64x1xf32>) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<64x1xf32>) {
+ %0 = "some_def"() : () -> (vector<16x32xf32>)
+ %1 = "some_def"() : () -> (vector<64x32xf32>)
+ %2 = vector.insert_strided_slice %0, %1 { offsets = [36, 0], strides = [1, 1]}
----------------
charithaintc wrote:
in this version, distributed dimension is fully inserted (offset is always 0). I will add support for other cases in separate PRs.
Example:
```
func.func @vector_insert_strided_slice_2d_to_2d(%laneid: index) -> (vector<64x2xf32>) {
%r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<64x2xf32>) {
%0 = "some_def"() : () -> (vector<16x32xf32>)
%1 = "some_def"() : () -> (vector<64x64xf32>)
%2 = vector.insert_strided_slice %0, %1 { offsets = [36, 1], strides = [1, 1]}
: vector<16x32xf32> into vector<64x64xf32>
gpu.yield %2 : vector<64x64xf32>
}
return %r : vector<64x2xf32>
}
```
Lowering filters out this case by checking,
```
// Distributed dimension must be fully inserted.
if (srcType.getDimSize(sourceDistributedDim) !=
destType.getDimSize(destDistributedDim))
return rewriter.notifyMatchFailure(
insertOp, "distributed dimension must be fully inserted");
```
https://github.com/llvm/llvm-project/pull/145421
More information about the Mlir-commits
mailing list