[llvm] cc943a6 - [SLP]Fix PR106626: trye several attempts for lookup values, if not found.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 15:11:40 PDT 2024
Author: Alexey Bataev
Date: 2024-08-29T15:07:20-07:00
New Revision: cc943a67d114e28c28f561c3b1a48ff2003264ce
URL: https://github.com/llvm/llvm-project/commit/cc943a67d114e28c28f561c3b1a48ff2003264ce
DIFF: https://github.com/llvm/llvm-project/commit/cc943a67d114e28c28f561c3b1a48ff2003264ce.diff
LOG: [SLP]Fix PR106626: trye several attempts for lookup values, if not found.
If the value is used in Scalar several times, the first attempt to find
its position in the node (if ReuseShuffleIndices and ReorderIndices not
empty) may fail. In this case need to find another copy of the same
value and try again.
Fixes https://github.com/llvm/llvm-project/issues/106626
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 775fa9ba75cfb7..edb2567fa057b3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3237,15 +3237,25 @@ class BoUpSLP {
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
- unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
- assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
- if (!ReorderIndices.empty())
- FoundLane = ReorderIndices[FoundLane];
- assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
- if (!ReuseShuffleIndices.empty()) {
- FoundLane = std::distance(ReuseShuffleIndices.begin(),
- find(ReuseShuffleIndices, FoundLane));
+ unsigned FoundLane = getVectorFactor();
+ for (auto *It = find(Scalars, V), *End = Scalars.end(); It != End;
+ std::advance(It, 1)) {
+ if (*It != V)
+ continue;
+ FoundLane = std::distance(Scalars.begin(), It);
+ assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
+ if (!ReorderIndices.empty())
+ FoundLane = ReorderIndices[FoundLane];
+ assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
+ if (ReuseShuffleIndices.empty())
+ break;
+ if (auto *RIt = find(ReuseShuffleIndices, FoundLane);
+ RIt != ReuseShuffleIndices.end()) {
+ FoundLane = std::distance(ReuseShuffleIndices.begin(), RIt);
+ break;
+ }
}
+ assert(FoundLane < getVectorFactor() && "Unable to find given value.");
return FoundLane;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
new file mode 100644
index 00000000000000..dbc4f3d59d4f9b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[BB61:.*]]
+; CHECK: [[BB61]]:
+; CHECK-NEXT: br label %[[BB64:.*]]
+; CHECK: [[BB62:.*]]:
+; CHECK-NEXT: br i1 poison, label %[[BB63:.*]], label %[[BB64]]
+; CHECK: [[BB63]]:
+; CHECK-NEXT: br label %[[BB64]]
+; CHECK: [[BB64]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
+; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16
+; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8
+; CHECK-NEXT: [[I69:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[I70:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[I71:%.*]] = load float, ptr poison, align 16
+; CHECK-NEXT: [[I72:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[I73:%.*]] = load float, ptr poison, align 8
+; CHECK-NEXT: [[I74:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[I75:%.*]] = load float, ptr poison, align 16
+; CHECK-NEXT: [[I76:%.*]] = load float, ptr poison, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x float> poison, float [[I76]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP2]], float [[I74]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I73]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I71]], i32 4
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I70]], i32 5
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
+; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
+; CHECK: [[BB77]]:
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT: br label %[[BB78:.*]]
+; CHECK: [[BB78]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
+; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
+; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
+; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
+; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
+; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
+; CHECK: [[BB167]]:
+; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
+; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
+; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
+; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
+; CHECK: [[BB184]]:
+; CHECK-NEXT: br label %[[BB185:.*]]
+; CHECK: [[BB185]]:
+; CHECK-NEXT: br i1 poison, label %[[BB185]], label %[[BB186]]
+; CHECK: [[BB186]]:
+; CHECK-NEXT: [[I187:%.*]] = phi nsz float [ [[TMP35]], %[[BB167]] ], [ poison, %[[BB185]] ]
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %bb61
+
+bb61:
+ br label %bb64
+
+bb62:
+ br i1 poison, label %bb63, label %bb64
+
+bb63:
+ br label %bb64
+
+bb64:
+ %i = phi nsz float [ poison, %bb61 ], [ poison, %bb63 ], [ poison, %bb62 ]
+ %i65 = phi nsz float [ poison, %bb61 ], [ poison, %bb63 ], [ poison, %bb62 ]
+ %i66 = load float, ptr poison, align 16
+ %i67 = load float, ptr poison, align 4
+ %i68 = load float, ptr poison, align 8
+ %i69 = load float, ptr poison, align 4
+ %i70 = load float, ptr poison, align 4
+ %i71 = load float, ptr poison, align 16
+ %i72 = load float, ptr poison, align 4
+ %i73 = load float, ptr poison, align 8
+ %i74 = load float, ptr poison, align 4
+ %i75 = load float, ptr poison, align 16
+ %i76 = load float, ptr poison, align 4
+ br i1 poison, label %bb167, label %bb77
+
+bb77:
+ br label %bb78
+
+bb78:
+ %i79 = phi nsz float [ %i66, %bb77 ], [ %i103, %bb78 ]
+ %i80 = phi nsz float [ %i67, %bb77 ], [ %i104, %bb78 ]
+ %i81 = phi nsz float [ %i68, %bb77 ], [ %i105, %bb78 ]
+ %i82 = phi nsz float [ poison, %bb77 ], [ %i106, %bb78 ]
+ %i83 = phi nsz float [ poison, %bb77 ], [ %i123, %bb78 ]
+ %i84 = phi nsz float [ %i69, %bb77 ], [ %i124, %bb78 ]
+ %i85 = phi nsz float [ poison, %bb77 ], [ %i125, %bb78 ]
+ %i86 = phi nsz float [ %i70, %bb77 ], [ %i126, %bb78 ]
+ %i87 = fmul fast float %i79, poison
+ %i88 = fmul fast float %i80, poison
+ %i89 = fmul fast float %i81, poison
+ %i90 = fmul fast float %i82, poison
+ %i91 = fmul fast float %i83, poison
+ %i92 = fadd fast float %i91, %i87
+ %i93 = fmul fast float %i84, poison
+ %i94 = fadd fast float %i93, %i88
+ %i95 = fmul fast float %i85, poison
+ %i96 = fadd fast float %i95, %i89
+ %i97 = fmul fast float %i86, poison
+ %i98 = fadd fast float %i97, %i90
+ %i99 = fadd fast float %i92, poison
+ %i100 = fadd fast float %i94, poison
+ %i101 = fadd fast float %i96, poison
+ %i102 = fadd fast float %i98, poison
+ %i103 = fadd fast float %i99, poison
+ %i104 = fadd fast float %i100, poison
+ %i105 = fadd fast float %i101, poison
+ %i106 = fadd fast float %i102, poison
+ %i107 = fmul fast float %i79, poison
+ %i108 = fmul fast float %i80, poison
+ %i109 = fmul fast float %i81, poison
+ %i110 = fmul fast float %i82, poison
+ %i111 = fmul fast float %i83, poison
+ %i112 = fadd fast float %i111, %i107
+ %i113 = fmul fast float %i84, poison
+ %i114 = fadd fast float %i113, %i108
+ %i115 = fmul fast float %i85, poison
+ %i116 = fadd fast float %i115, %i109
+ %i117 = fmul fast float %i86, poison
+ %i118 = fadd fast float %i117, %i110
+ %i119 = fadd fast float %i112, poison
+ %i120 = fadd fast float %i114, poison
+ %i121 = fadd fast float %i116, poison
+ %i122 = fadd fast float %i118, poison
+ %i123 = fadd fast float %i119, poison
+ %i124 = fadd fast float %i120, poison
+ %i125 = fadd fast float %i121, poison
+ %i126 = fadd fast float %i122, poison
+ %i127 = fmul fast float %i79, %i
+ %i128 = fmul fast float %i80, %i
+ %i129 = fmul fast float %i81, %i
+ %i130 = fmul fast float %i82, %i
+ %i131 = fmul fast float %i83, %i65
+ %i132 = fadd fast float %i131, %i127
+ %i133 = fmul fast float %i84, %i65
+ %i134 = fadd fast float %i133, %i128
+ %i135 = fmul fast float %i85, %i65
+ %i136 = fadd fast float %i135, %i129
+ %i137 = fmul fast float %i86, %i65
+ %i138 = fadd fast float %i137, %i130
+ %i139 = fadd fast float %i132, poison
+ %i140 = fadd fast float %i134, poison
+ %i141 = fadd fast float %i136, poison
+ %i142 = fadd fast float %i138, poison
+ %i143 = fadd fast float %i139, poison
+ %i144 = fadd fast float %i140, poison
+ %i145 = fadd fast float %i141, poison
+ %i146 = fadd fast float %i142, poison
+ %i147 = fmul fast float %i79, poison
+ %i148 = fmul fast float %i80, poison
+ %i149 = fmul fast float %i81, poison
+ %i150 = fmul fast float %i82, poison
+ %i151 = fmul fast float %i83, poison
+ %i152 = fadd fast float %i151, %i147
+ %i153 = fmul fast float %i84, poison
+ %i154 = fadd fast float %i153, %i148
+ %i155 = fmul fast float %i85, poison
+ %i156 = fadd fast float %i155, %i149
+ %i157 = fmul fast float %i86, poison
+ %i158 = fadd fast float %i157, %i150
+ %i159 = fadd fast float %i152, poison
+ %i160 = fadd fast float %i154, poison
+ %i161 = fadd fast float %i156, poison
+ %i162 = fadd fast float %i158, poison
+ %i163 = fadd fast float %i159, poison
+ %i164 = fadd fast float %i160, poison
+ %i165 = fadd fast float %i161, poison
+ %i166 = fadd fast float %i162, poison
+ br i1 poison, label %bb78, label %bb167
+
+bb167:
+ %i168 = phi nsz float [ %i76, %bb64 ], [ %i166, %bb78 ]
+ %i169 = phi nsz float [ poison, %bb64 ], [ %i165, %bb78 ]
+ %i170 = phi nsz float [ poison, %bb64 ], [ %i164, %bb78 ]
+ %i171 = phi nsz float [ %i75, %bb64 ], [ %i163, %bb78 ]
+ %i172 = phi nsz float [ %i74, %bb64 ], [ %i146, %bb78 ]
+ %i173 = phi nsz float [ %i73, %bb64 ], [ %i145, %bb78 ]
+ %i174 = phi nsz float [ %i72, %bb64 ], [ %i144, %bb78 ]
+ %i175 = phi nsz float [ %i71, %bb64 ], [ %i143, %bb78 ]
+ %i176 = phi nsz float [ %i70, %bb64 ], [ %i126, %bb78 ]
+ %i177 = phi nsz float [ poison, %bb64 ], [ %i125, %bb78 ]
+ %i178 = phi nsz float [ %i69, %bb64 ], [ %i124, %bb78 ]
+ %i179 = phi nsz float [ poison, %bb64 ], [ %i123, %bb78 ]
+ %i180 = phi nsz float [ poison, %bb64 ], [ %i106, %bb78 ]
+ %i181 = phi nsz float [ %i68, %bb64 ], [ %i105, %bb78 ]
+ %i182 = phi nsz float [ %i67, %bb64 ], [ %i104, %bb78 ]
+ %i183 = phi nsz float [ %i66, %bb64 ], [ %i103, %bb78 ]
+ store float %i182, ptr poison, align 1
+ store float %i174, ptr poison, align 1
+ br i1 poison, label %bb186, label %bb184
+
+bb184:
+ br label %bb185
+
+bb185:
+ br i1 poison, label %bb185, label %bb186
+
+bb186:
+ %i187 = phi nsz float [ %i178, %bb167 ], [ poison, %bb185 ]
+ ret void
+}
More information about the llvm-commits
mailing list