[PATCH] D27677: [AArch64] Add feature for disabling unaligned quadword store penalty
Matthew Simpson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 12 09:09:19 PST 2016
mssimpso created this revision.
mssimpso added reviewers: MatzeB, rengolin, mcrosier, gberry, t.p.northover.
mssimpso added a subscriber: llvm-commits.
Herald added a subscriber: aemerson.
This patch adds a sub-target feature for enabling and disabling the unaligned quadword store penalty used by the getMemoryOpCost TTI hook. The patch disables the penalty for Kryo and Falkor.
https://reviews.llvm.org/D27677
Files:
lib/Target/AArch64/AArch64Subtarget.cpp
lib/Target/AArch64/AArch64Subtarget.h
lib/Target/AArch64/AArch64TargetTransformInfo.cpp
test/Analysis/CostModel/AArch64/falkor.ll
test/Analysis/CostModel/AArch64/kryo.ll
Index: test/Analysis/CostModel/AArch64/kryo.ll
===================================================================
--- test/Analysis/CostModel/AArch64/kryo.ll
+++ test/Analysis/CostModel/AArch64/kryo.ll
@@ -24,3 +24,14 @@
ret void
}
+
+; CHECK-LABEL: memoryOpCost
+define void @memoryOpCost(<2 x i64> %a, <2 x i64>* %ptr) {
+
+ ; Disable the unaligned quadword store penalty for kryo.
+ ;
+ ; CHECK: cost of 1 {{.*}} store <2 x i64> %a, <2 x i64>* %ptr, align 8
+ store <2 x i64> %a, <2 x i64>* %ptr, align 8
+
+ ret void
+}
Index: test/Analysis/CostModel/AArch64/falkor.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AArch64/falkor.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -cost-model -analyze -mcpu=falkor | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: memoryOpCost
+define void @memoryOpCost(<2 x i64> %a, <2 x i64>* %ptr) {
+
+ ; Disable the unaligned quadword store penalty for falkor.
+ ;
+ ; CHECK: cost of 1 {{.*}} store <2 x i64> %a, <2 x i64>* %ptr, align 8
+ store <2 x i64> %a, <2 x i64>* %ptr, align 8
+
+ ret void
+}
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -467,7 +467,8 @@
unsigned Alignment, unsigned AddressSpace) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
+ if (ST->useUnalignedQuadwordStorePenalty() && Opcode == Instruction::Store &&
+ Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
// Unaligned stores are extremely inefficient. We don't split
// unaligned v2i64 stores because the negative impact that has shown in
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -85,6 +85,7 @@
bool HasArithmeticCbzFusion = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
+ bool UseUnalignedQuadwordStorePenalty = true;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -196,6 +197,9 @@
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool useRSqrt() const { return UseRSqrt; }
+ bool useUnalignedQuadwordStorePenalty() const {
+ return UseUnalignedQuadwordStorePenalty;
+ }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
Index: lib/Target/AArch64/AArch64Subtarget.cpp
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.cpp
+++ lib/Target/AArch64/AArch64Subtarget.cpp
@@ -71,14 +71,16 @@
break;
case Falkor:
MaxInterleaveFactor = 4;
+ UseUnalignedQuadwordStorePenalty = false;
break;
case Kryo:
MaxInterleaveFactor = 4;
VectorInsertExtractBaseCost = 2;
CacheLineSize = 128;
PrefetchDistance = 740;
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 11;
+ UseUnalignedQuadwordStorePenalty = false;
break;
case Vulcan:
MaxInterleaveFactor = 4;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D27677.81093.patch
Type: text/x-patch
Size: 3627 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161212/c77fd965/attachment.bin>
More information about the llvm-commits
mailing list