[llvm] [Matrix] Lower vector reductions using shape info (PR #142055)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 11:54:54 PDT 2025
================
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+
+define i32 @reduce_add_4x2(ptr %in, ptr %out) {
+; CHECK-LABEL: @reduce_add_4x2(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[COL_LOAD]], [[COL_LOAD1]]
+; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: ret i32 [[REDUCE]]
+;
+ %inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
+ %reduce = call i32 @llvm.vector.reduce.add(<8 x i32> %inv)
+ ret i32 %reduce
+}
+
+define i32 @reduce_add_8x1(ptr %in, ptr %out) {
+; CHECK-LABEL: @reduce_add_8x1(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <8 x i32>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[COL_LOAD]])
+; CHECK-NEXT: ret i32 [[REDUCE]]
+;
+ %inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 8, i1 1, i32 8, i32 1)
+ %reduce = call i32 @llvm.vector.reduce.add(<8 x i32> %inv)
+ ret i32 %reduce
+}
+
+define i32 @reduce_add_1x8(ptr %in, ptr %out) {
+; CHECK-LABEL: @reduce_add_1x8(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <1 x i32>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 1
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP]], align 4
+; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[IN]], i64 2
+; CHECK-NEXT: [[COL_LOAD3:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP2]], align 4
+; CHECK-NEXT: [[VEC_GEP4:%.*]] = getelementptr i32, ptr [[IN]], i64 3
+; CHECK-NEXT: [[COL_LOAD5:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP4]], align 4
+; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i32, ptr [[IN]], i64 4
+; CHECK-NEXT: [[COL_LOAD7:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP6]], align 4
+; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr i32, ptr [[IN]], i64 5
+; CHECK-NEXT: [[COL_LOAD9:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP8]], align 4
+; CHECK-NEXT: [[VEC_GEP10:%.*]] = getelementptr i32, ptr [[IN]], i64 6
+; CHECK-NEXT: [[COL_LOAD11:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP10]], align 4
+; CHECK-NEXT: [[VEC_GEP12:%.*]] = getelementptr i32, ptr [[IN]], i64 7
+; CHECK-NEXT: [[COL_LOAD13:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP12]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add <1 x i32> [[COL_LOAD]], [[COL_LOAD1]]
+; CHECK-NEXT: [[TMP2:%.*]] = add <1 x i32> [[TMP1]], [[COL_LOAD3]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <1 x i32> [[TMP2]], [[COL_LOAD5]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP3]], [[COL_LOAD7]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i32> [[TMP4]], [[COL_LOAD9]]
+; CHECK-NEXT: [[TMP6:%.*]] = add <1 x i32> [[TMP5]], [[COL_LOAD11]]
+; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP6]], [[COL_LOAD13]]
+; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> [[TMP7]])
----------------
fhahn wrote:
(It's not completely now, we already get the similarly bad bevhavior for other ops, but with the reductions it's probably even worth, so it might be worth tackling before going further down that road)
https://github.com/llvm/llvm-project/pull/142055
More information about the llvm-commits
mailing list