[PATCH] D116740: [SLP]Improve reordering for the nodes beeing used in alternate vectorization.
Valeriy Dmitriev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 1 15:18:26 PDT 2022
vdmitrie added a comment.
Herald added a project: All.
I believe https://reviews.llvm.org/D120492 supposed to fix the issue I reported earlier, but that did not happen. The test I've sent earlier is a simplified one. I just slightly modified it to show misbehaving of the reordering.
This is okay:
define void @test(i32* %arg, i32* %arg1, i32* %arg2) {
bb:
%i3 = load i32, i32* %arg, align 4
%s3 = add i32 %i3, %i3
%i4 = getelementptr inbounds i32, i32* %arg, i64 4
%i5 = load i32, i32* %i4, align 4
%s5 = add i32 %i5, %i5
%i8 = getelementptr inbounds i32, i32* %arg, i64 1
%i9 = load i32, i32* %i8, align 4
%s9 = add i32 %i9, %i9
%i10 = getelementptr inbounds i32, i32* %arg, i64 5
%i11 = load i32, i32* %i10, align 4
%s11 = add i32 %i11, %i11
%i14 = getelementptr inbounds i32, i32* %arg, i64 2
%i15 = load i32, i32* %i14, align 4
%s15 = add i32 %i15, %i15
%i16 = getelementptr inbounds i32, i32* %arg, i64 6
%i17 = load i32, i32* %i16, align 4
%s17 = add i32 %i17, %i17
%i20 = getelementptr inbounds i32, i32* %arg, i64 3
%i21 = load i32, i32* %i20, align 4
%s21 = add i32 %i21, %i21
%i22 = getelementptr inbounds i32, i32* %arg, i64 7
%i23 = load i32, i32* %i22, align 4
%s23 = add i32 %i23, %i23
%i1 = load i32, i32* %arg1, align 4
%i6 = getelementptr inbounds i32, i32* %arg1, i64 1
%i7 = load i32, i32* %i6, align 4
%i12 = getelementptr inbounds i32, i32* %arg1, i64 2
%i13 = load i32, i32* %i12, align 4
%i18 = getelementptr inbounds i32, i32* %arg1, i64 3
%i19 = load i32, i32* %i18, align 4
%i24 = sub i32 0, %s21
%i25 = sub i32 %i24, %s23
%i26 = add i32 %i25, %i19
%i27 = sub i32 undef, %s15
%i28 = sub i32 %i27, %s17
%i29 = add i32 %i28, %i13
%i30 = sub i32 0, %s9
%i31 = sub i32 %i30, %s11
%i32 = add i32 %i31, %i7
%i33 = sub i32 0, %s3
%i34 = sub i32 %i33, %s5
%i35 = add i32 %i34, %i1
%i36 = add i32 %i29, 1
%i37 = add i32 %i36, 0
%i38 = add i32 %i37, 0
store i32 %i38, i32* %arg2, align 4
%i39 = getelementptr inbounds i32, i32* %arg2, i64 2
%i40 = add i32 0, %i32
%i41 = sub i32 %i40, 0
%i42 = sub i32 %i41, 0
store i32 %i42, i32* %i39, align 4
%i43 = getelementptr inbounds i32, i32* %arg2, i64 1
%i44 = add i32 %i35, 0
%i45 = sub i32 %i44, 0
%i46 = sub i32 %i45, 0
store i32 %i46, i32* %i43, align 4
%i47 = getelementptr inbounds i32, i32* %arg2, i64 3
%i48 = sub i32 %i26, 0
%i49 = sub i32 %i48, 0
%i50 = add i32 %i49, 0
store i32 %i50, i32* %i47, align 4
ret void
}
merely because of this if statement:
if (UserTE->UserTreeIndices.size() != 1)
break;
effectively returning to behavior prior to the patch.
But this test still produce all these extra shuffles:
define void @test(i32* %arg, i32* %arg1, i32* %arg2) {
bb:
%i3 = load i32, i32* %arg, align 4
%s3 = add i32 %i3, 6
%i4 = getelementptr inbounds i32, i32* %arg, i64 4
%i5 = load i32, i32* %i4, align 4
%s5 = add i32 %i5, 6
%i8 = getelementptr inbounds i32, i32* %arg, i64 1
%i9 = load i32, i32* %i8, align 4
%s9 = add i32 %i9, 6
%i10 = getelementptr inbounds i32, i32* %arg, i64 5
%i11 = load i32, i32* %i10, align 4
%s11 = add i32 %i11, 6
%i14 = getelementptr inbounds i32, i32* %arg, i64 2
%i15 = load i32, i32* %i14, align 4
%s15 = add i32 %i15, 6
%i16 = getelementptr inbounds i32, i32* %arg, i64 6
%i17 = load i32, i32* %i16, align 4
%s17 = add i32 %i17, 6
%i20 = getelementptr inbounds i32, i32* %arg, i64 3
%i21 = load i32, i32* %i20, align 4
%s21 = add i32 %i21, 6
%i22 = getelementptr inbounds i32, i32* %arg, i64 7
%i23 = load i32, i32* %i22, align 4
%s23 = add i32 %i23, 6
%i1 = load i32, i32* %arg1, align 4
%i6 = getelementptr inbounds i32, i32* %arg1, i64 1
%i7 = load i32, i32* %i6, align 4
%i12 = getelementptr inbounds i32, i32* %arg1, i64 2
%i13 = load i32, i32* %i12, align 4
%i18 = getelementptr inbounds i32, i32* %arg1, i64 3
%i19 = load i32, i32* %i18, align 4
%i24 = sub i32 0, %s21
%i25 = sub i32 %i24, %s23
%i26 = add i32 %i25, %i19
%i27 = sub i32 undef, %s15
%i28 = sub i32 %i27, %s17
%i29 = add i32 %i28, %i13
%i30 = sub i32 0, %s9
%i31 = sub i32 %i30, %s11
%i32 = add i32 %i31, %i7
%i33 = sub i32 0, %s3
%i34 = sub i32 %i33, %s5
%i35 = add i32 %i34, %i1
%i36 = add i32 %i29, 1
%i37 = add i32 %i36, 0
%i38 = add i32 %i37, 0
store i32 %i38, i32* %arg2, align 4
%i39 = getelementptr inbounds i32, i32* %arg2, i64 2
%i40 = add i32 0, %i32
%i41 = sub i32 %i40, 0
%i42 = sub i32 %i41, 0
store i32 %i42, i32* %i39, align 4
%i43 = getelementptr inbounds i32, i32* %arg2, i64 1
%i44 = add i32 %i35, 0
%i45 = sub i32 %i44, 0
%i46 = sub i32 %i45, 0
store i32 %i46, i32* %i43, align 4
%i47 = getelementptr inbounds i32, i32* %arg2, i64 3
%i48 = sub i32 %i26, 0
%i49 = sub i32 %i48, 0
%i50 = add i32 %i49, 0
store i32 %i50, i32* %i47, align 4
ret void
}
================
Comment at: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:3073
+ return;
+ if (UserTE->UserTreeIndices.empty())
+ UserTE = nullptr;
----------------
this condition is always false because if UserTE->UserTreeIndices.size() != 1 we exit loop at 3067
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D116740/new/
https://reviews.llvm.org/D116740
More information about the llvm-commits
mailing list