[PATCH] D81491: [InstCombine] reassociate FP diff of sums into sum of diffs
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 14 06:24:58 PDT 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb5fb26951a8e: [InstCombine] reassociate FP diff of sums into sum of diffs (authored by spatel).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D81491/new/
https://reviews.llvm.org/D81491
Files:
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
llvm/test/Transforms/InstCombine/vector-reductions.ll
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
Index: llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -240,10 +240,9 @@
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[VEC1:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP1]])
-; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
-; CHECK-NEXT: [[ADD_3:%.*]] = fsub fast float [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ole float [[ADD_3]], [[TOLERANCE:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP4]])
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ole float [[TMP5]], [[TOLERANCE:%.*]]
; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP3]] to i32
; CHECK-NEXT: ret i32 [[COND]]
;
Index: llvm/test/Transforms/InstCombine/vector-reductions.ll
===================================================================
--- llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -7,9 +7,9 @@
define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_v4f32(
-; CHECK-NEXT: [[R0:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
-; CHECK-NEXT: [[R1:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
-; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[R0]], [[R1]]
+; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nsz float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]])
+; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]]
; CHECK-NEXT: ret float [[R]]
;
%r0 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
@@ -18,6 +18,8 @@
ret float %r
}
+; negative test - fsub must allow reassociation
+
define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_v4f32_fmf(
; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
@@ -31,6 +33,8 @@
ret float %r
}
+; negative test - extra uses could create extra instructions
+
define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_extra_use1(
; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
@@ -46,6 +50,8 @@
ret float %r
}
+; negative test - extra uses could create extra instructions
+
define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_extra_use2(
; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
@@ -61,6 +67,8 @@
ret float %r
}
+; negative test - can't reassociate different vector types
+
define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_type_mismatch(
; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
Index: llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2228,6 +2228,23 @@
return BinaryOperator::CreateFSubFMF(XZ, YW, &I);
}
+ auto m_FaddRdx = [](Value *&Sum, Value *&Vec) {
+ return m_OneUse(
+ m_Intrinsic<Intrinsic::experimental_vector_reduce_v2_fadd>(
+ m_Value(Sum), m_Value(Vec)));
+ };
+ Value *A0, *A1, *V0, *V1;
+ if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) &&
+ V0->getType() == V1->getType()) {
+ // Difference of sums is sum of differences:
+ // add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A1
+ Value *Sub = Builder.CreateFSubFMF(V0, V1, &I);
+ Value *Rdx = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_reduce_v2_fadd,
+ {A0->getType(), Sub->getType()}, {A0, Sub}, &I);
+ return BinaryOperator::CreateFSubFMF(Rdx, A1, &I);
+ }
+
if (Instruction *F = factorizeFAddFSub(I, Builder))
return F;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D81491.270616.patch
Type: text/x-patch
Size: 5212 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200614/d4246378/attachment.bin>
More information about the llvm-commits
mailing list