[llvm] r252316 - [AArch64]Enable the narrow ld promotion only on profitable microarchitectures
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 6 08:27:47 PST 2015
Author: junbuml
Date: Fri Nov 6 10:27:47 2015
New Revision: 252316
URL: http://llvm.org/viewvc/llvm-project?rev=252316&view=rev
Log:
[AArch64]Enable the narrow ld promotion only on profitable microarchitectures
The benefit from converting narrow loads into a wider load (r251438) could be
micro-architecturally dependent, as it assumes that a single load with two bitfield
extracts is cheaper than two narrow loads. Currently, this conversion is
enabled only in cortex-a57 on which performance benefits were verified.
Added:
llvm/trunk/test/CodeGen/AArch64/arm64-ldr-merge.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-ldp.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=252316&r1=252315&r2=252316&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Fri Nov 6 10:27:47 2015
@@ -78,13 +78,12 @@ typedef struct LdStPairFlags {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- AArch64LoadStoreOpt() : MachineFunctionPass(ID), IsStrictAlign(false) {
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
}
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
- bool IsStrictAlign;
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
@@ -127,7 +126,11 @@ struct AArch64LoadStoreOpt : public Mach
// Find and merge foldable ldr/str instructions.
bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
- bool optimizeBlock(MachineBasicBlock &MBB);
+ // Check if converting two narrow loads into a single wider load with
+ // bitfield extracts could be enabled.
+ bool enableNarrowLdMerge(MachineFunction &Fn);
+
+ bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -1161,7 +1164,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdSt
return false;
}
-bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
+ bool enableNarrowLdOpt) {
bool Modified = false;
// Three tranformations to do here:
// 1) Find halfword loads that can be merged into a single 32-bit word load
@@ -1189,7 +1193,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(
// ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- !IsStrictAlign && MBBI != E;) {
+ enableNarrowLdOpt && MBBI != E;) {
MachineInstr *MI = MBBI;
switch (MI->getOpcode()) {
default:
@@ -1372,15 +1376,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(
return Modified;
}
+bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
+ const AArch64Subtarget *SubTarget =
+ &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ bool ProfitableArch = SubTarget->isCortexA57();
+ // FIXME: The benefit from converting narrow loads into a wider load could be
+ // microarchitectural as it assumes that a single load with two bitfield
+ // extracts is cheaper than two narrow loads. Currently, this conversion is
+ // enabled only in cortex-a57 on which performance benefits were verified.
+ return ProfitableArch & (!SubTarget->requiresStrictAlign());
+}
+
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
TRI = Fn.getSubtarget().getRegisterInfo();
- IsStrictAlign = (static_cast<const AArch64Subtarget &>(Fn.getSubtarget()))
- .requiresStrictAlign();
bool Modified = false;
+ bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB);
+ Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
return Modified;
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ldp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ldp.ll?rev=252316&r1=252315&r2=252316&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-ldp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-ldp.ll Fri Nov 6 10:27:47 2015
@@ -356,51 +356,3 @@ define i64 @ldp_sext_int_post(i32* %p) n
ret i64 %add
}
-; CHECK-LABEL: Ldrh_merge
-; CHECK-NOT: ldrh
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
-; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
-
-define i16 @Ldrh_merge(i16* nocapture readonly %p) {
- %1 = load i16, i16* %p, align 2
- ;%conv = zext i16 %0 to i32
- %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
- %2 = load i16, i16* %arrayidx2, align 2
- %add = add nuw nsw i16 %1, %2
- ret i16 %add
-}
-
-; CHECK-LABEL: Ldurh_merge
-; CHECK-NOT: ldurh
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
-; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
-define i16 @Ldurh_merge(i16* nocapture readonly %p) {
-entry:
- %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
- %0 = load i16, i16* %arrayidx
- %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
- %1 = load i16, i16* %arrayidx3
- %add = add nuw nsw i16 %0, %1
- ret i16 %add
-}
-
-; CHECK-LABEL: Ldrh_4_merge
-; CHECK-NOT: ldrh
-; CHECK: ldp [[NEW_DEST:w[0-9]+]]
-define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
- %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
- %l0 = load i16, i16* %arrayidx
- %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
- %l1 = load i16, i16* %arrayidx2
- %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
- %l2 = load i16, i16* %arrayidx7
- %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
- %l3 = load i16, i16* %arrayidx12
- %add4 = add nuw nsw i16 %l1, %l0
- %add9 = add nuw nsw i16 %add4, %l2
- %add14 = add nuw nsw i16 %add9, %l3
-
- ret i16 %add14
-}
Added: llvm/trunk/test/CodeGen/AArch64/arm64-ldr-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ldr-merge.ll?rev=252316&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-ldr-merge.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-ldr-merge.ll Fri Nov 6 10:27:47 2015
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: Ldrh_merge
+; CHECK-NOT: ldrh
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
+; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
+define i16 @Ldrh_merge(i16* nocapture readonly %p) {
+ %1 = load i16, i16* %p, align 2
+ %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
+ %2 = load i16, i16* %arrayidx2, align 2
+ %add = add nuw nsw i16 %1, %2
+ ret i16 %add
+}
+
+; CHECK-LABEL: Ldurh_merge
+; CHECK-NOT: ldurh
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
+; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
+define i16 @Ldurh_merge(i16* nocapture readonly %p) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
+ %0 = load i16, i16* %arrayidx
+ %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
+ %1 = load i16, i16* %arrayidx3
+ %add = add nuw nsw i16 %0, %1
+ ret i16 %add
+}
+
+; CHECK-LABEL: Ldrh_4_merge
+; CHECK-NOT: ldrh
+; CHECK: ldp [[NEW_DEST:w[0-9]+]]
+define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
+ %l0 = load i16, i16* %arrayidx
+ %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
+ %l1 = load i16, i16* %arrayidx2
+ %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
+ %l2 = load i16, i16* %arrayidx7
+ %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
+ %l3 = load i16, i16* %arrayidx12
+ %add4 = add nuw nsw i16 %l1, %l0
+ %add9 = add nuw nsw i16 %add4, %l2
+ %add14 = add nuw nsw i16 %add9, %l3
+ ret i16 %add14
+}
More information about the llvm-commits
mailing list