[llvm] r286110 - [AArch64] Removed the narrow load merging code in the ld/st optimizer.
Chad Rosier via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 7 07:27:22 PST 2016
Author: mcrosier
Date: Mon Nov 7 09:27:22 2016
New Revision: 286110
URL: http://llvm.org/viewvc/llvm-project?rev=286110&view=rev
Log:
[AArch64] Removed the narrow load merging code in the ld/st optimizer.
This feature has been disabled for some time now, so remove cruft.
Differential Revision: https://reviews.llvm.org/D26248
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=286110&r1=286109&r2=286110&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Mon Nov 7 09:27:22 2016
@@ -61,9 +61,10 @@ def FeatureReserveX18 : SubtargetFeature
"Reserve X18, making it unavailable "
"as a GPR">;
-def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
- "MergeNarrowLoads", "true",
- "Merge narrow load instructions">;
+def FeatureMergeNarrowZeroSt : SubtargetFeature<"merge-narrow-zero-st",
+ "MergeNarrowZeroStores", "true",
+ "Merge narrow zero store "
+ "instructions">;
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
@@ -181,7 +182,7 @@ def ProcA57 : SubtargetFeature<"a57"
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
- FeatureMergeNarrowLd,
+ FeatureMergeNarrowZeroSt,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -252,7 +253,7 @@ def ProcKryo : SubtargetFeature<"kryo
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
- FeatureMergeNarrowLd,
+ FeatureMergeNarrowZeroSt,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=286110&r1=286109&r2=286110&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Mon Nov 7 09:27:22 2016
@@ -38,7 +38,6 @@ STATISTIC(NumPostFolded, "Number of post
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
-STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
@@ -51,10 +50,6 @@ static cl::opt<unsigned> LdStLimit("aarc
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);
-static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden,
- cl::init(false),
- cl::desc("Enable narrow load merge"));
-
#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
namespace {
@@ -107,11 +102,11 @@ struct AArch64LoadStoreOpt : public Mach
bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
MachineBasicBlock::iterator &StoreI);
- // Merge the two instructions indicated into a wider instruction.
+ // Merge the two instructions indicated into a wider narrow store instruction.
MachineBasicBlock::iterator
- mergeNarrowInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator MergeMI,
- const LdStPairFlags &Flags);
+ mergeNarrowZeroStores(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator MergeMI,
+ const LdStPairFlags &Flags);
// Merge the two instructions indicated into a single pair-wise instruction.
MachineBasicBlock::iterator
@@ -147,8 +142,8 @@ struct AArch64LoadStoreOpt : public Mach
mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update, bool IsPreIdx);
- // Find and merge foldable ldr/str instructions.
- bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
+ // Find and merge zero store instructions.
+ bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
// Find and pair ldr/str instructions.
bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
@@ -156,7 +151,7 @@ struct AArch64LoadStoreOpt : public Mach
// Find and promote load instructions which read directly from store.
bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
- bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
+ bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -173,23 +168,6 @@ char AArch64LoadStoreOpt::ID = 0;
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
AARCH64_LOAD_STORE_OPT_NAME, false, false)
-static unsigned getBitExtrOpcode(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode.");
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- return AArch64::UBFMWri;
- case AArch64::LDRSBWui:
- case AArch64::LDURSBWi:
- case AArch64::LDRSHWui:
- case AArch64::LDURSHWi:
- return AArch64::SBFMWri;
- }
-}
-
static bool isNarrowStore(unsigned Opc) {
switch (Opc) {
default:
@@ -202,30 +180,6 @@ static bool isNarrowStore(unsigned Opc)
}
}
-static bool isNarrowLoad(unsigned Opc) {
- switch (Opc) {
- default:
- return false;
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
- case AArch64::LDRSHWui:
- case AArch64::LDURSHWi:
- case AArch64::LDRSBWui:
- case AArch64::LDURSBWi:
- return true;
- }
-}
-
-static bool isNarrowLoad(MachineInstr &MI) {
- return isNarrowLoad(MI.getOpcode());
-}
-
-static bool isNarrowLoadOrStore(unsigned Opc) {
- return isNarrowLoad(Opc) || isNarrowStore(Opc);
-}
-
// Scaling factor for unscaled load or store.
static int getMemScale(MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -317,23 +271,11 @@ static unsigned getMatchingNonSExtOpcode
case AArch64::STURSi:
case AArch64::LDRSui:
case AArch64::LDURSi:
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
- case AArch64::LDRSBWui:
- return AArch64::LDRBBui;
- case AArch64::LDRSHWui:
- return AArch64::LDRHHui;
- case AArch64::LDURSBWi:
- return AArch64::LDURBBi;
- case AArch64::LDURSHWi:
- return AArch64::LDURHHi;
}
}
@@ -353,18 +295,6 @@ static unsigned getMatchingWideOpcode(un
return AArch64::STURXi;
case AArch64::STRWui:
return AArch64::STRXui;
- case AArch64::LDRHHui:
- case AArch64::LDRSHWui:
- return AArch64::LDRWui;
- case AArch64::LDURHHi:
- case AArch64::LDURSHWi:
- return AArch64::LDURWi;
- case AArch64::LDRBBui:
- case AArch64::LDRSBWui:
- return AArch64::LDRHHui;
- case AArch64::LDURBBi:
- case AArch64::LDURSBWi:
- return AArch64::LDURHHi;
}
}
@@ -608,23 +538,20 @@ static bool isLdOffsetInRangeOfSt(Machin
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
-static bool isPromotableZeroStoreOpcode(unsigned Opc) {
- return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
-}
-
-static bool isPromotableZeroStoreOpcode(MachineInstr &MI) {
- return isPromotableZeroStoreOpcode(MI.getOpcode());
-}
-
static bool isPromotableZeroStoreInst(MachineInstr &MI) {
- return (isPromotableZeroStoreOpcode(MI)) &&
+ unsigned Opc = MI.getOpcode();
+ return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
+ isNarrowStore(Opc)) &&
getLdStRegOp(MI).getReg() == AArch64::WZR;
}
MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator MergeMI,
- const LdStPairFlags &Flags) {
+AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator MergeMI,
+ const LdStPairFlags &Flags) {
+ assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
+ "Expected promotable zero stores.");
+
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -665,105 +592,9 @@ AArch64LoadStoreOpt::mergeNarrowInsns(Ma
OffsetImm /= 2;
}
+ // Construct the new instruction.
DebugLoc DL = I->getDebugLoc();
MachineBasicBlock *MBB = I->getParent();
- if (isNarrowLoad(Opc)) {
- MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI);
- // When merging small (< 32 bit) loads for big-endian targets, the order of
- // the component parts gets swapped.
- if (!Subtarget->isLittleEndian())
- std::swap(RtMI, Rt2MI);
- // Construct the new load instruction.
- MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
- NewMemMI =
- BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
- .addOperand(getLdStRegOp(*RtNewDest))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*MergeMI));
- (void)NewMemMI;
-
- DEBUG(
- dbgs()
- << "Creating the new load and extract. Replacing instructions:\n ");
- DEBUG(I->print(dbgs()));
- DEBUG(dbgs() << " ");
- DEBUG(MergeMI->print(dbgs()));
- DEBUG(dbgs() << " with instructions:\n ");
- DEBUG((NewMemMI)->print(dbgs()));
-
- int Width = getMemScale(*I) == 1 ? 8 : 16;
- int LSBLow = 0;
- int LSBHigh = Width;
- int ImmsLow = LSBLow + Width - 1;
- int ImmsHigh = LSBHigh + Width - 1;
- MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I);
- if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
- // Create the bitfield extract for high bits.
- BitExtMI1 =
- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
- .addOperand(getLdStRegOp(*Rt2MI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(LSBHigh)
- .addImm(ImmsHigh);
- // Create the bitfield extract for low bits.
- if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
- // For unsigned, prefer to use AND for low bits.
- BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(*RtMI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(ImmsLow);
- } else {
- BitExtMI2 =
- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
- .addOperand(getLdStRegOp(*RtMI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(LSBLow)
- .addImm(ImmsLow);
- }
- } else {
- // Create the bitfield extract for low bits.
- if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
- // For unsigned, prefer to use AND for low bits.
- BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
- .addOperand(getLdStRegOp(*RtMI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(ImmsLow);
- } else {
- BitExtMI1 =
- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
- .addOperand(getLdStRegOp(*RtMI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(LSBLow)
- .addImm(ImmsLow);
- }
-
- // Create the bitfield extract for high bits.
- BitExtMI2 =
- BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
- .addOperand(getLdStRegOp(*Rt2MI))
- .addReg(getLdStRegOp(*RtNewDest).getReg())
- .addImm(LSBHigh)
- .addImm(ImmsHigh);
- }
- (void)BitExtMI1;
- (void)BitExtMI2;
-
- DEBUG(dbgs() << " ");
- DEBUG((BitExtMI1)->print(dbgs()));
- DEBUG(dbgs() << " ");
- DEBUG((BitExtMI2)->print(dbgs()));
- DEBUG(dbgs() << "\n");
-
- // Erase the old instructions.
- I->eraseFromParent();
- MergeMI->eraseFromParent();
- return NextI;
- }
- assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
- "Expected promotable zero store");
-
- // Construct the new instruction.
MachineInstrBuilder MIB;
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
@@ -772,7 +603,7 @@ AArch64LoadStoreOpt::mergeNarrowInsns(Ma
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
(void)MIB;
- DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n ");
+ DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
DEBUG(MergeMI->print(dbgs()));
@@ -1179,13 +1010,14 @@ static bool areCandidatesToMergeOrPair(M
return true;
}
- // If the second instruction isn't even a load/store, bail out.
+ // If the second instruction isn't even a mergable/pairable load/store, bail
+ // out.
if (!PairIsValidLdStrOpc)
return false;
- // FIXME: We don't support merging narrow loads/stores with mixed
- // scaled/unscaled offsets.
- if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB))
+ // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
+ // offsets.
+ if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
return false;
// Try to match an unscaled load/store with a scaled load/store.
@@ -1596,37 +1428,26 @@ bool AArch64LoadStoreOpt::tryToPromoteLo
return false;
}
-// Find narrow loads that can be converted into a single wider load with
-// bitfield extract instructions. Also merge adjacent zero stores into a wider
-// store.
-bool AArch64LoadStoreOpt::tryToMergeLdStInst(
+// Merge adjacent zero stores into a wider store.
+bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
MachineBasicBlock::iterator &MBBI) {
- assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) &&
- "Expected narrow op.");
+ assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator E = MI.getParent()->end();
if (!TII->isCandidateToMergeOrPair(MI))
return false;
- // For promotable zero stores, the stored value should be WZR.
- if (isPromotableZeroStoreOpcode(MI) &&
- getLdStRegOp(MI).getReg() != AArch64::WZR)
- return false;
-
// Look ahead up to LdStLimit instructions for a mergable instruction.
LdStPairFlags Flags;
MachineBasicBlock::iterator MergeMI =
findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
if (MergeMI != E) {
- if (isNarrowLoad(MI)) {
- ++NumNarrowLoadsPromoted;
- } else if (isPromotableZeroStoreInst(MI)) {
- ++NumZeroStoresPromoted;
- }
+ ++NumZeroStoresPromoted;
+
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
- MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags);
+ MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
return true;
}
return false;
@@ -1667,7 +1488,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStI
}
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
- bool enableNarrowLdOpt) {
+ bool EnableNarrowZeroStOpt) {
bool Modified = false;
// Four tranformations to do here:
// 1) Find loads that directly read from stores and promote them by
@@ -1706,29 +1527,21 @@ bool AArch64LoadStoreOpt::optimizeBlock(
}
}
}
- // 2) Find narrow loads that can be converted into a single wider load
- // with bitfield extract instructions.
- // e.g.,
- // ldrh w0, [x2]
- // ldrh w1, [x2, #2]
- // ; becomes
- // ldr w0, [x2]
- // ubfx w1, w0, #16, #16
- // and w0, w0, #ffff
- //
- // Also merge adjacent zero stores into a wider store.
+ // 2) Merge adjacent zero stores into a wider store.
// e.g.,
// strh wzr, [x0]
// strh wzr, [x0, #2]
// ; becomes
// str wzr, [x0]
+ // e.g.,
+ // str wzr, [x0]
+ // str wzr, [x0, #4]
+ // ; becomes
+ // str xzr, [x0]
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- enableNarrowLdOpt && MBBI != E;) {
- MachineInstr &MI = *MBBI;
- unsigned Opc = MI.getOpcode();
- if (isPromotableZeroStoreOpcode(Opc) ||
- (EnableNarrowLdMerge && isNarrowLoad(Opc))) {
- if (tryToMergeLdStInst(MBBI)) {
+ EnableNarrowZeroStOpt && MBBI != E;) {
+ if (isPromotableZeroStoreInst(*MBBI)) {
+ if (tryToMergeZeroStInst(MBBI)) {
Modified = true;
} else
++MBBI;
@@ -1889,10 +1702,10 @@ bool AArch64LoadStoreOpt::runOnMachineFu
UsedRegs.resize(TRI->getNumRegs());
bool Modified = false;
- bool enableNarrowLdOpt =
- Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
+ bool enableNarrowZeroStOpt =
+ Subtarget->mergeNarrowStores() && !Subtarget->requiresStrictAlign();
for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
+ Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
return Modified;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=286110&r1=286109&r2=286110&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Mon Nov 7 09:27:22 2016
@@ -71,7 +71,7 @@ protected:
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
- bool MergeNarrowLoads = false;
+ bool MergeNarrowZeroStores = false;
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -179,7 +179,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
- bool mergeNarrowLoads() const { return MergeNarrowLoads; }
+ bool mergeNarrowStores() const { return MergeNarrowZeroStores; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll?rev=286110&r1=286109&r2=286110&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll Mon Nov 7 09:27:22 2016
@@ -1,329 +1,14 @@
-; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
-; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=BE
-; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=kryo -verify-machineinstrs -enable-narrow-ld-merge=true | FileCheck %s --check-prefix=CHECK --check-prefix=LE
-
-; CHECK-LABEL: Ldrh_merge
-; CHECK-NOT: ldrh
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i16 @Ldrh_merge(i16* nocapture readonly %p) {
- %1 = load i16, i16* %p, align 2
- %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
- %2 = load i16, i16* %arrayidx2, align 2
- %add = sub nuw nsw i16 %1, %2
- ret i16 %add
-}
-
-; CHECK-LABEL: Ldurh_merge
-; CHECK-NOT: ldurh
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i16 @Ldurh_merge(i16* nocapture readonly %p) {
-entry:
- %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
- %0 = load i16, i16* %arrayidx
- %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
- %1 = load i16, i16* %arrayidx3
- %add = sub nuw nsw i16 %0, %1
- ret i16 %add
-}
-
-; CHECK-LABEL: Ldrh_4_merge
-; CHECK-NOT: ldrh
-; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
-; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
-; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
-; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
-; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
-; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
-; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
-; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
-; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
-; LE: sub w0, [[TEMP2]], [[WORD2HI]]
-; BE: sub w0, [[TEMP2]], [[WORD2LO]]
-define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
- %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
- %l0 = load i16, i16* %arrayidx
- %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
- %l1 = load i16, i16* %arrayidx2
- %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
- %l2 = load i16, i16* %arrayidx7
- %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
- %l3 = load i16, i16* %arrayidx12
- %add4 = sub nuw nsw i16 %l1, %l0
- %add9 = udiv i16 %add4, %l2
- %add14 = sub nuw nsw i16 %add9, %l3
- ret i16 %add14
-}
-
-; CHECK-LABEL: Ldrsh_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-
-define i32 @Ldrsh_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = sext i16 %tmp to i32
- %sexttmp1 = sext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp1, %sexttmp
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsh_zsext_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = zext i16 %tmp to i32
- %sexttmp1 = sext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsh_szext_merge
-; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = sext i16 %tmp to i32
- %sexttmp1 = zext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrb_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrb_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = zext i8 %tmp to i32
- %sexttmp1 = zext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = sext i8 %tmp to i32
- %sexttmp1 = sext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_zsext_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = zext i8 %tmp to i32
- %sexttmp1 = sext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldrsb_szext_merge
-; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
-; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = sext i8 %tmp to i32
- %sexttmp1 = zext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = sext i16 %tmp to i32
- %sexttmp1 = sext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_zsext_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = zext i16 %tmp to i32
- %sexttmp1 = sext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursh_szext_merge
-; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
-; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
-; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursh_szext_merge(i16* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
- %tmp = load i16, i16* %add.ptr0
- %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
- %tmp1 = load i16, i16* %add.ptr
- %sexttmp = sext i16 %tmp to i32
- %sexttmp1 = zext i16 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldurb_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldurb_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = zext i8 %tmp to i32
- %sexttmp1 = zext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = sext i8 %tmp to i32
- %sexttmp1 = sext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_zsext_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = zext i8 %tmp to i32
- %sexttmp1 = sext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
-
-; CHECK-LABEL: Ldursb_szext_merge
-; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
-; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
-; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
-; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
-; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
-; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
-define i32 @Ldursb_szext_merge(i8* %p) nounwind {
- %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
- %tmp = load i8, i8* %add.ptr0
- %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
- %tmp1 = load i8, i8* %add.ptr
- %sexttmp = sext i8 %tmp to i32
- %sexttmp1 = zext i8 %tmp1 to i32
- %add = sub nsw i32 %sexttmp, %sexttmp1
- ret i32 %add
-}
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=kryo -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
; CHECK-LABEL: Strh_zero
; CHECK: str wzr
define void @Strh_zero(i16* nocapture %P, i32 %n) {
entry:
- %idxprom = sext i32 %n to i64
+ %idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
- store i16 0, i16* %arrayidx
+ store i16 0, i16* %arrayidx
%add = add nsw i32 %n, 1
%idxprom1 = sext i32 %add to i64
%arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
More information about the llvm-commits
mailing list