[PATCH] D123638: [SLP][AArch64] Implement lookahead operand reordering score of splat loads for AArch64
Vasileios Porpodas via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 20 09:14:53 PDT 2022
vporpo updated this revision to Diff 423930.
vporpo added a comment.
Addressed comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D123638/new/
https://reviews.llvm.org/D123638
Files:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
Index: llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
+++ llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
@@ -10,21 +10,21 @@
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
+; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
+; CHECK-NEXT: [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
+; CHECK-NEXT: [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
-; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]]
; CHECK-NEXT: ret double [[ADD3]]
;
entry:
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -280,6 +280,23 @@
return isLegalMaskedGatherScatter(DataType);
}
+ bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
+ // Return true if we can generate a `ld1r` splat load instruction.
+ if (!ST->hasNEON() || NumElements.isScalable())
+ return false;
+ switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64: {
+ // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
+ unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
+ return VectorBits >= 64;
+ }
+ }
+ return false;
+ }
+
bool isLegalNTStore(Type *DataType, Align Alignment) {
// NOTE: The logic below is mostly geared towards LV, which calls it with
// vectors with 2 elements. We might want to improve that, if other
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2606,6 +2606,20 @@
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
Kind == TTI::SK_Reverse) {
+
+ // Check for broadcast loads.
+ if (Kind == TTI::SK_Broadcast) {
+ bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) {
+ return isa<LoadInst>(V);
+ });
+ if (IsLoad) {
+ assert(isLegalBroadcastLoad(Tp->getElementType(),
+ LT.second.getVectorElementCount()) &&
+ "Expected this to be accepted by isLegalBroadcastLoad()");
+ return 0; // broadcast is handled by ld1r
+ }
+ }
+
static const CostTblEntry ShuffleTbl[] = {
// Broadcast shuffle kinds can be performed with 'dup'.
{ TTI::SK_Broadcast, MVT::v8i8, 1 },
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123638.423930.patch
Type: text/x-patch
Size: 5021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220420/d132d110/attachment.bin>
More information about the llvm-commits
mailing list