[PATCH] D100302: [VectorCombine] Run load/extract scalarization after scalarizing store.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri May 14 04:02:47 PDT 2021
fhahn updated this revision to Diff 345392.
fhahn added a comment.
Rebase and update to actually pass the first user.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100302/new/
https://reviews.llvm.org/D100302
Files:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
Index: llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
+++ llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
@@ -6,18 +6,19 @@
define void @load_extract_insert_store_const_idx(<225 x double>* %A) {
; CHECK-LABEL: @load_extract_insert_store_const_idx(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 1
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 1
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 1
+; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 1
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
-; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 1
+; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8
; CHECK-NEXT: ret void
;
entry:
%lv = load <225 x double>, <225 x double>* %A, align 8
- %ext.0 = extractelement <225 x double> %lv, i64 0
+ %ext.0 = extractelement <225 x double> %lv, i64 1
%mul = fmul double 20.0, %ext.0
%ext.1 = extractelement <225 x double> %lv, i64 1
%sub = fsub double %ext.1, %mul
@@ -33,13 +34,14 @@
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_1]])
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i64 [[IDX_2:%.*]], 225
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_2]])
-; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_1]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_1]]
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 1
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_2]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[IDX_2]]
+; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 1
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
-; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[IDX_1]]
+; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8
; CHECK-NEXT: ret void
;
entry:
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -835,8 +835,17 @@
if (SI->getAlign() < NSI->getAlign())
NSI->setAlignment(SI->getAlign());
replaceValue(I, *NSI);
+ auto *IEI = cast<Instruction>(SI->getOperand(0));
// Need erasing the store manually.
I.eraseFromParent();
+ // Removing the insertelement instruction may unlock further load/extract
+ // scalarization opportunities.
+ if (IEI->use_empty())
+ IEI->eraseFromParent();
+ if (!Load->user_empty()) {
+ Instruction *UI = cast<Instruction>(*Load->user_begin());
+ scalarizeLoadExtract(*UI);
+ }
return true;
}
@@ -949,8 +958,8 @@
MadeChange |= foldBitcastShuf(I);
MadeChange |= scalarizeBinopOrCmp(I);
MadeChange |= foldExtractedCmps(I);
- MadeChange |= foldSingleElementStore(I);
MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= foldSingleElementStore(I);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100302.345392.patch
Type: text/x-patch
Size: 4416 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210514/e765cfb0/attachment.bin>
More information about the llvm-commits
mailing list