[llvm] 8953ecf - [InstCombine] reassociate diff of sums into sum of diffs
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 22 18:05:13 PDT 2020
Author: Sanjay Patel
Date: 2020-06-22T20:47:09-04:00
New Revision: 8953ecf22bcf665ffb06399a97265ab71aebebf8
URL: https://github.com/llvm/llvm-project/commit/8953ecf22bcf665ffb06399a97265ab71aebebf8
DIFF: https://github.com/llvm/llvm-project/commit/8953ecf22bcf665ffb06399a97265ab71aebebf8.diff
LOG: [InstCombine] reassociate diff of sums into sum of diffs
This is the integer sibling to D81491.
(a[0] + a[1] + a[2] + a[3]) - (b[0] + b[1] + b[2] +b[3]) -->
(a[0] - b[0]) + (a[1] - b[1]) + (a[2] - b[2]) + (a[3] - b[3])
Removing the "experimental" from these intrinsics is likely
not too far away.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
llvm/test/Transforms/InstCombine/vector-reductions.ll
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 655ba941f7c5..a7f5e0a7774d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1787,6 +1787,21 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(XZ, YW);
}
+ auto m_AddRdx = [](Value *&Vec) {
+ return m_OneUse(
+ m_Intrinsic<Intrinsic::experimental_vector_reduce_add>(m_Value(Vec)));
+ };
+ Value *V0, *V1;
+ if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
+ V0->getType() == V1->getType()) {
+ // Difference of sums is sum of
diff erences:
+ // add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
+ Value *Sub = Builder.CreateSub(V0, V1);
+ Value *Rdx = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_reduce_add, {Sub->getType()}, {Sub});
+ return replaceInstUsesWith(I, Rdx);
+ }
+
if (Constant *C = dyn_cast<Constant>(Op0)) {
Value *X;
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index bd051eb830a3..f11307468457 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -88,10 +88,9 @@ define float @
diff _of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1,
define i32 @
diff _of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @
diff _of_sums_v4i32(
-; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
-; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
-; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]
-; CHECK-NEXT: ret i32 [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%r0 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v0)
%r1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v1)
@@ -99,6 +98,8 @@ define i32 @
diff _of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
+; negative test - extra uses could create extra instructions
+
define i32 @
diff _of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @
diff _of_sums_v4i32_extra_use1(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
@@ -114,6 +115,8 @@ define i32 @
diff _of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
+; negative test - extra uses could create extra instructions
+
define i32 @
diff _of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @
diff _of_sums_v4i32_extra_use2(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
@@ -129,6 +132,8 @@ define i32 @
diff _of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
+; negative test - can't reassociate
diff erent vector types
+
define i32 @
diff _of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @
diff _of_sums_type_mismatch2(
; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 1cceb48da206..67bcec9021b7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -132,10 +132,9 @@ define i32 @TestVectorsEqual_alt(i32* noalias %Vec0, i32* noalias %Vec1, i32 %To
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
-; CHECK-NEXT: [[ADD_3:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[ADD_3]], [[TOLERANCE:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP5]], [[TOLERANCE:%.*]]
; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32
; CHECK-NEXT: ret i32 [[COND]]
;
More information about the llvm-commits
mailing list