[Mlir-commits] [mlir] add vector subbyte store support (PR #70293)

Wed Oct 25 22:48:35 PDT 2023

saienduri wrote:

Here is the IR dump for the lowering if it helps for the test cases:
Vector store for i8 is just normal with no conversions (already supported).

// -----// IR Dump After {anonymous}::TestEmulateNarrowTypePass (test-emulate-narrow-int) //----- //
func.func @vector_store_i8(%arg0: vector<8xi8>, %arg1: index, %arg2: index) {
  %alloc = memref.alloc() : memref<4x8xi8>
  vector.store %arg0, %alloc[%arg1, %arg2] : memref<4x8xi8>, vector<8xi8>
  return
}

// -----// IR Dump After CSE (cse) //----- //
module {
  func.func @vector_store_i8(%arg0: vector<8xi8>, %arg1: index, %arg2: index) {
    %alloc = memref.alloc() : memref<4x8xi8>
    vector.store %arg0, %alloc[%arg1, %arg2] : memref<4x8xi8>, vector<8xi8>
    return
  }
}

module {
  func.func @vector_store_i8(%arg0: vector<8xi8>, %arg1: index, %arg2: index) {
    %alloc = memref.alloc() : memref<4x8xi8>
    vector.store %arg0, %alloc[%arg1, %arg2] : memref<4x8xi8>, vector<8xi8>
    return
  }
}

// -----
// -----// IR Dump After {anonymous}::TestEmulateNarrowTypePass (test-emulate-narrow-int) //----- //
func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) {
  %alloc = memref.alloc() : memref<16xi8>
  %c0 = arith.constant 0 : index
  %c4 = arith.constant 4 : index
  %c8 = arith.constant 8 : index
  %c8_0 = arith.constant 8 : index
  %c1 = arith.constant 1 : index
  %0 = affine.apply affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>()[%arg1, %arg2]
  %1 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
  vector.store %1, %alloc[%0] : memref<16xi8>, vector<4xi8>
  return
}

// -----// IR Dump After CSE (cse) //----- //
#map = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>
module {
  func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) {
    %alloc = memref.alloc() : memref<16xi8>
    %0 = affine.apply #map()[%arg1, %arg2]
    %1 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
    vector.store %1, %alloc[%0] : memref<16xi8>, vector<4xi8>
    return
  }
}

#map = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>
module {
  func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) {
    %alloc = memref.alloc() : memref<16xi8>
    %0 = affine.apply #map()[%arg1, %arg2]
    %1 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
    vector.store %1, %alloc[%0] : memref<16xi8>, vector<4xi8>
    return
  }
}

// -----
// -----// IR Dump After {anonymous}::TestEmulateNarrowTypePass (test-emulate-narrow-int) //----- //
func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
  %0 = affine.apply affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)>()[%arg1, %arg2]
  %alloc = memref.alloc(%0) : memref<?xi8>
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %1 = affine.apply affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)>()[%arg3, %arg2, %arg4]
  %2 = affine.apply affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)>()[%arg1, %arg2]
  %3 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
  vector.store %3, %alloc[%1] : memref<?xi8>, vector<4xi8>
  return
}

// -----// IR Dump After CSE (cse) //----- //
#map = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)>
#map1 = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)>
module {
  func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
    %0 = affine.apply #map()[%arg1, %arg2]
    %alloc = memref.alloc(%0) : memref<?xi8>
    %1 = affine.apply #map1()[%arg3, %arg2, %arg4]
    %2 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
    vector.store %2, %alloc[%1] : memref<?xi8>, vector<4xi8>
    return
  }
}

#map = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)>
#map1 = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)>
module {
  func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
    %0 = affine.apply #map()[%arg1, %arg2]
    %alloc = memref.alloc(%0) : memref<?xi8>
    %1 = affine.apply #map1()[%arg3, %arg2, %arg4]
    %2 = vector.bitcast %arg0 : vector<8xi4> to vector<4xi8>
    vector.store %2, %alloc[%1] : memref<?xi8>, vector<4xi8>
    return
  }
}

https://github.com/llvm/llvm-project/pull/70293