[llvm] 58deb20 - Revert "Merge memtag instructions with adjacent stack slots."
Evgenii Stepanov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 8 14:37:22 PST 2020
Author: Evgenii Stepanov
Date: 2020-01-08T14:36:12-08:00
New Revision: 58deb20dd2dfbfbfff8097ce80137d12a57a3607
URL: https://github.com/llvm/llvm-project/commit/58deb20dd2dfbfbfff8097ce80137d12a57a3607
DIFF: https://github.com/llvm/llvm-project/commit/58deb20dd2dfbfbfff8097ce80137d12a57a3607.diff
LOG: Revert "Merge memtag instructions with adjacent stack slots."
*** Bad machine code: Tied use must be a register ***
- function: stg_alloca17
- basic block: %bb.0 entry (0x20076710580)
- instruction: early-clobber %0:gpr64common, early-clobber %1:gpr64sp = STGloop 272, %stack.0.a :: (store 272 into %ir.a, align 16)
- operand 3: %stack.0.a
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/21481/steps/test-check-all/logs/stdio
This reverts commit b675a7628ce6a21b1e4a71c079a67badfb8b073d.
Added:
Modified:
llvm/include/llvm/CodeGen/TargetFrameLowering.h
llvm/lib/CodeGen/PrologEpilogInserter.cpp
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
llvm/test/CodeGen/AArch64/settag.ll
llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll
Removed:
llvm/test/CodeGen/AArch64/settag-merge.ll
llvm/test/CodeGen/AArch64/settag-merge.mir
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index a0beee36c748..c7d4c4d7e5d4 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -309,13 +309,6 @@ class TargetFrameLowering {
RegScavenger *RS = nullptr) const {
}
- /// processFunctionBeforeFrameIndicesReplaced - This method is called
- /// immediately before MO_FrameIndex operands are eliminated, but after the
- /// frame is finalized. This method is optional.
- virtual void
- processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
- RegScavenger *RS = nullptr) const {}
-
virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
report_fatal_error("WinEH not implemented for this target");
}
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index d583643ac68f..3909b5717281 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -259,10 +259,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
for (auto &I : EntryDbgValues)
I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
- // Allow the target machine to make final modifications to the function
- // before the frame layout is finalized.
- TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS);
-
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
//
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 97162ae22187..3b8f8a19fe49 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -349,38 +349,22 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- Register SizeReg = MI.getOperand(0).getReg();
- Register AddressReg = MI.getOperand(1).getReg();
+ Register SizeReg = MI.getOperand(2).getReg();
+ Register AddressReg = MI.getOperand(3).getReg();
MachineFunction *MF = MBB.getParent();
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
- const unsigned OpCode1 =
- ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
- const unsigned OpCode2 =
+ const unsigned OpCode =
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
- unsigned Size = MI.getOperand(2).getImm();
- assert(Size > 0 && Size % 16 == 0);
- if (Size % (16 * 2) != 0) {
- BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
- .addReg(AddressReg)
- .addReg(AddressReg)
- .addImm(1);
- Size -= 16;
- }
- MachineBasicBlock::iterator I =
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
- .addImm(Size);
- expandMOVImm(MBB, I, 64);
-
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopBB);
MF->insert(++LoopBB->getIterator(), DoneBB);
- BuildMI(LoopBB, DL, TII->get(OpCode2))
+ BuildMI(LoopBB, DL, TII->get(OpCode))
.addDef(AddressReg)
.addReg(AddressReg)
.addReg(AddressReg)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 39d32863f15b..c732106014e6 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -170,11 +170,6 @@ static cl::opt<bool>
cl::desc("reverse the CSR restore sequence"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> StackTaggingMergeSetTag(
- "stack-tagging-merge-settag",
- cl::desc("merge settag instruction in function epilog"), cl::init(true),
- cl::Hidden);
-
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// This is the biggest offset to the stack pointer we can encode in aarch64
@@ -485,39 +480,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
-bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
- MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
- if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
- return false;
-
- if (MBB.empty())
- return true;
-
- // Disable combined SP bump if the last instruction is an MTE tag store. It
- // is almost always better to merge SP adjustment into those instructions.
- MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
- MachineBasicBlock::iterator Begin = MBB.begin();
- while (LastI != Begin) {
- --LastI;
- if (LastI->isTransient())
- continue;
- if (!LastI->getFlag(MachineInstr::FrameDestroy))
- break;
- }
- switch (LastI->getOpcode()) {
- case AArch64::STGloop:
- case AArch64::STZGloop:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
- return false;
- default:
- return true;
- }
- llvm_unreachable("unreachable");
-}
-
// Given a load or a store instruction, generate an appropriate unwinding SEH
// code on Windows.
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
@@ -1497,7 +1459,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
+ bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
@@ -2675,399 +2637,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
.addImm(0);
}
-namespace {
-struct TagStoreInstr {
- MachineInstr *MI;
- int64_t Offset, Size;
- explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
- : MI(MI), Offset(Offset), Size(Size) {}
-};
-
-class TagStoreEdit {
- MachineFunction *MF;
- MachineBasicBlock *MBB;
- MachineRegisterInfo *MRI;
- // Tag store instructions that are being replaced.
- SmallVector<TagStoreInstr, 8> TagStores;
- // Combined memref arguments of the above instructions.
- SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
-
- // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
- // FrameRegOffset + Size) with the address tag of SP.
- Register FrameReg;
- StackOffset FrameRegOffset;
- int64_t Size;
- // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
- Optional<int64_t> FrameRegUpdate;
- // MIFlags for any FrameReg updating instructions.
- unsigned FrameRegUpdateFlags;
-
- // Use zeroing instruction variants.
- bool ZeroData;
- DebugLoc DL;
-
- void emitUnrolled(MachineBasicBlock::iterator InsertI);
- void emitLoop(MachineBasicBlock::iterator InsertI);
-
-public:
- TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
- : MBB(MBB), ZeroData(ZeroData) {
- MF = MBB->getParent();
- MRI = &MF->getRegInfo();
- }
- // Add an instruction to be replaced. Instructions must be added in the
- // ascending order of Offset, and have to be adjacent.
- void addInstruction(TagStoreInstr I) {
- assert((TagStores.empty() ||
- TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
- "Non-adjacent tag store instructions.");
- TagStores.push_back(I);
- }
- void clear() { TagStores.clear(); }
- // Emit equivalent code at the given location, and erase the current set of
- // instructions. May skip if the replacement is not profitable. May invalidate
- // the input iterator and replace it with a valid one.
- void emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast);
-};
-
-void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
- const AArch64InstrInfo *TII =
- MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
-
- const int64_t kMinOffset = -256 * 16;
- const int64_t kMaxOffset = 255 * 16;
-
- Register BaseReg = FrameReg;
- int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
- if (BaseRegOffsetBytes < kMinOffset ||
- BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
- Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
- emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
- {BaseRegOffsetBytes, MVT::i8}, TII);
- BaseReg = ScratchReg;
- BaseRegOffsetBytes = 0;
- }
-
- MachineInstr *LastI = nullptr;
- while (Size) {
- int64_t InstrSize = (Size > 16) ? 32 : 16;
- unsigned Opcode =
- InstrSize == 16
- ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
- : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
- MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
- .addReg(AArch64::SP)
- .addReg(BaseReg)
- .addImm(BaseRegOffsetBytes / 16)
- .setMemRefs(CombinedMemRefs);
- // A store to [BaseReg, #0] should go last for an opportunity to fold the
- // final SP adjustment in the epilogue.
- if (BaseRegOffsetBytes == 0)
- LastI = I;
- BaseRegOffsetBytes += InstrSize;
- Size -= InstrSize;
- }
-
- if (LastI)
- MBB->splice(InsertI, MBB, LastI);
-}
-
-void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
- const AArch64InstrInfo *TII =
- MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
-
- Register BaseReg = FrameRegUpdate
- ? FrameReg
- : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
- Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
-
- emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
-
- int64_t LoopSize = Size;
- // If the loop size is not a multiple of 32, split off one 16-byte store at
- // the end to fold BaseReg update into.
- if (FrameRegUpdate && *FrameRegUpdate)
- LoopSize -= LoopSize % 32;
- MachineInstr *LoopI =
- BuildMI(*MBB, InsertI, DL,
- TII->get(ZeroData ? AArch64::STZGloop : AArch64::STGloop))
- .addDef(SizeReg)
- .addDef(BaseReg)
- .addImm(LoopSize)
- .addReg(BaseReg)
- .setMemRefs(CombinedMemRefs);
- if (FrameRegUpdate)
- LoopI->setFlags(FrameRegUpdateFlags);
-
- int64_t ExtraBaseRegUpdate =
- FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
- if (LoopSize < Size) {
- assert(FrameRegUpdate);
- assert(Size - LoopSize == 16);
- // Tag 16 more bytes at BaseReg and update BaseReg.
- BuildMI(*MBB, InsertI, DL,
- TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
- .addDef(BaseReg)
- .addReg(BaseReg)
- .addReg(BaseReg)
- .addImm(1 + ExtraBaseRegUpdate / 16)
- .setMemRefs(CombinedMemRefs)
- .setMIFlags(FrameRegUpdateFlags);
- } else if (ExtraBaseRegUpdate) {
- // Update BaseReg.
- BuildMI(
- *MBB, InsertI, DL,
- TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
- .addDef(BaseReg)
- .addReg(BaseReg)
- .addImm(std::abs(ExtraBaseRegUpdate))
- .addImm(0)
- .setMIFlags(FrameRegUpdateFlags);
- }
-}
-
-// Check if *II is a register update that can be merged into STGloop that ends
-// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
-// end of the loop.
-bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
- int64_t Size, int64_t *TotalOffset) {
- MachineInstr &MI = *II;
- if ((MI.getOpcode() == AArch64::ADDXri ||
- MI.getOpcode() == AArch64::SUBXri) &&
- MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
- unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
- int64_t Offset = MI.getOperand(2).getImm() << Shift;
- if (MI.getOpcode() == AArch64::SUBXri)
- Offset = -Offset;
- int64_t AbsPostOffset = std::abs(Offset - Size);
- const int64_t kMaxOffset =
- 0xFFF; // Max encoding for unshifted ADDXri / SUBXri
- if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
- *TotalOffset = Offset;
- return true;
- }
- }
- return false;
-}
-
-void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
- SmallVectorImpl<MachineMemOperand *> &MemRefs) {
- MemRefs.clear();
- for (auto &TS : TSE) {
- MachineInstr *MI = TS.MI;
- // An instruction without memory operands may access anything. Be
- // conservative and return an empty list.
- if (MI->memoperands_empty()) {
- MemRefs.clear();
- return;
- }
- MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
- }
-}
-
-void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast) {
- if (TagStores.empty())
- return;
- TagStoreInstr &FirstTagStore = TagStores[0];
- TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
- Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
- DL = TagStores[0].MI->getDebugLoc();
-
- unsigned Reg;
- FrameRegOffset = TFI->resolveFrameOffsetReference(
- *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
- /*PreferFP=*/false, /*ForSimm=*/true);
- FrameReg = Reg;
- FrameRegUpdate = None;
-
- mergeMemRefs(TagStores, CombinedMemRefs);
-
- LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
- for (const auto &Instr
- : TagStores) { dbgs() << " " << *Instr.MI; });
-
- // Size threshold where a loop becomes shorter than a linear sequence of
- // tagging instructions.
- const int kSetTagLoopThreshold = 176;
- if (Size < kSetTagLoopThreshold) {
- if (TagStores.size() < 2)
- return;
- emitUnrolled(InsertI);
- } else {
- MachineInstr *UpdateInstr = nullptr;
- int64_t TotalOffset;
- if (IsLast) {
- // See if we can merge base register update into the STGloop.
- // This is done in AArch64LoadStoreOptimizer for "normal" stores,
- // but STGloop is way too unusual for that, and also it only
- // realistically happens in function epilogue. Also, STGloop is expanded
- // before that pass.
- if (InsertI != MBB->end() &&
- canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
- &TotalOffset)) {
- UpdateInstr = &*InsertI++;
- LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
- << *UpdateInstr);
- }
- }
-
- if (!UpdateInstr && TagStores.size() < 2)
- return;
-
- if (UpdateInstr) {
- FrameRegUpdate = TotalOffset;
- FrameRegUpdateFlags = UpdateInstr->getFlags();
- }
- emitLoop(InsertI);
- if (UpdateInstr)
- UpdateInstr->eraseFromParent();
- }
-
- for (auto &TS : TagStores)
- TS.MI->eraseFromParent();
-}
-
-bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
- int64_t &Size, bool &ZeroData) {
- MachineFunction &MF = *MI.getParent()->getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
-
- unsigned Opcode = MI.getOpcode();
- ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
- Opcode == AArch64::STZ2GOffset);
-
- if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
- if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
- return false;
- if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
- return false;
- Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
- Size = MI.getOperand(2).getImm();
- return true;
- }
-
- if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
- Size = 16;
- else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
- Size = 32;
- else
- return false;
-
- if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
- return false;
-
- Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
- 16 * MI.getOperand(2).getImm();
- return true;
-}
-
-// Detect a run of memory tagging instructions for adjacent stack frame slots,
-// and replace them with a shorter instruction sequence:
-// * replace STG + STG with ST2G
-// * replace STGloop + STGloop with STGloop
-// This code needs to run when stack slot offsets are already known, but before
-// FrameIndex operands in STG instructions are eliminated.
-MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
- const AArch64FrameLowering *TFI,
- RegScavenger *RS) {
- bool FirstZeroData;
- int64_t Size, Offset;
- MachineInstr &MI = *II;
- MachineBasicBlock *MBB = MI.getParent();
- MachineBasicBlock::iterator NextI = ++II;
- if (&MI == &MBB->instr_back())
- return II;
- if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
- return II;
-
- SmallVector<TagStoreInstr, 4> Instrs;
- Instrs.emplace_back(&MI, Offset, Size);
-
- constexpr int kScanLimit = 10;
- int Count = 0;
- for (MachineBasicBlock::iterator E = MBB->end();
- NextI != E && Count < kScanLimit; ++NextI) {
- MachineInstr &MI = *NextI;
- bool ZeroData;
- int64_t Size, Offset;
- // Collect instructions that update memory tags with a FrameIndex operand
- // and (when applicable) constant size, and whose output registers are dead
- // (the latter is almost always the case in practice). Since these
- // instructions effectively have no inputs or outputs, we are free to skip
- // any non-aliasing instructions in between without tracking used registers.
- if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
- if (ZeroData != FirstZeroData)
- break;
- Instrs.emplace_back(&MI, Offset, Size);
- continue;
- }
-
- // Only count non-transient, non-tagging instructions toward the scan
- // limit.
- if (!MI.isTransient())
- ++Count;
-
- // Just in case, stop before the epilogue code starts.
- if (MI.getFlag(MachineInstr::FrameSetup) ||
- MI.getFlag(MachineInstr::FrameDestroy))
- break;
-
- // Reject anything that may alias the collected instructions.
- if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
- break;
- }
-
- // New code will be inserted after the last tagging instruction we've found.
- MachineBasicBlock::iterator InsertI = Instrs.back().MI;
- InsertI++;
-
- llvm::stable_sort(Instrs,
- [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
- return Left.Offset < Right.Offset;
- });
-
- // Make sure that we don't have any overlapping stores.
- int64_t CurOffset = Instrs[0].Offset;
- for (auto &Instr : Instrs) {
- if (CurOffset > Instr.Offset)
- return NextI;
- CurOffset = Instr.Offset + Instr.Size;
- }
-
- // Find contiguous runs of tagged memory and emit shorter instruction
- // sequencies for them when possible.
- TagStoreEdit TSE(MBB, FirstZeroData);
- Optional<int64_t> EndOffset;
- for (auto &Instr : Instrs) {
- if (EndOffset && *EndOffset != Instr.Offset) {
- // Found a gap.
- TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
- TSE.clear();
- }
-
- TSE.addInstruction(Instr);
- EndOffset = Instr.Offset + Instr.Size;
- }
-
- TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
-
- return InsertI;
-}
-} // namespace
-
-void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
- MachineFunction &MF, RegScavenger *RS = nullptr) const {
- if (StackTaggingMergeSetTag)
- for (auto &BB : MF)
- for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
- II = tryMergeAdjacentSTG(II, this, RS);
-}
-
-/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
-/// before the update. This is easily retrieved as it is exactly the offset
-/// that is set in processFunctionBeforeFrameFinalized.
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
+/// the update. This is easily retrieved as it is exactly the offset that is set
+/// in processFunctionBeforeFrameFinalized.
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
const MachineFunction &MF, int FI, unsigned &FrameReg,
bool IgnoreSPUpdates) const {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 57a7924fb8f8..b5719feb6b15 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -77,10 +77,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
- void
- processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
- RegScavenger *RS) const override;
-
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
@@ -111,8 +107,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
- bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
- unsigned StackBumpBytes) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0ed2a678c4f0..54f3f7c10132 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3458,8 +3458,6 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Fourv1d:
case AArch64::IRG:
case AArch64::IRGstack:
- case AArch64::STGloop:
- case AArch64::STZGloop:
return AArch64FrameOffsetCannotUpdate;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 04a23f31ffd6..f4d340c9f06a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1514,17 +1514,17 @@ def TAGPstack
// register / expression for the tagged base pointer of the current function.
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
-// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
-// $Rn_wback is one past the end of the range. $Rm is the loop counter.
+// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
+// $Rn_wback is one past the end of the range.
let isCodeGenOnly=1, mayStore=1 in {
def STGloop
- : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
- [], "$Rn = $Rn_wback, at earlyclobber $Rn_wback, at earlyclobber $Rm" >,
+ : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback, at earlyclobber $Rn_wback,$Rm = $Rm_wback, at earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;
def STZGloop
- : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
- [], "$Rn = $Rn_wback, at earlyclobber $Rn_wback, at earlyclobber $Rm" >,
+ : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback, at earlyclobber $Rn_wback,$Rm = $Rm_wback, at earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 4a3778a2fd07..14f839cd4f81 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -390,10 +390,6 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
- // If even offset 0 is illegal, we don't want a virtual base register.
- if (!isFrameOffsetLegal(MI, AArch64::SP, 0))
- return false;
-
// The offset likely isn't legal; we want to allocate a virtual base register.
return true;
}
@@ -449,17 +445,6 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-static Register getScratchRegisterForInstruction(MachineInstr &MI) {
- // ST*Gloop can only have #fi in op3, and they have a constraint that
- // op1==op3. Since op1 is early-clobber, it may (and also must) be used as the
- // scratch register.
- if (MI.getOpcode() == AArch64::STGloop || MI.getOpcode() == AArch64::STZGloop)
- return MI.getOperand(1).getReg();
- else
- return MI.getMF()->getRegInfo().createVirtualRegister(
- &AArch64::GPR64RegClass);
-}
-
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -516,7 +501,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// in a scratch register.
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
- Register ScratchReg = getScratchRegisterForInstruction(MI);
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
TII);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
@@ -545,7 +531,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
- Register ScratchReg = getScratchRegisterForInstruction(MI);
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index e050a0028eca..ba61ed726e84 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -125,13 +125,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
ZeroData);
- const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Addr)->getIndex();
- Addr = DAG.getTargetFrameIndex(FI, MVT::i64);
+ if (ObjSize % 32 != 0) {
+ SDNode *St1 = DAG.getMachineNode(
+ ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
+ {MVT::i64, MVT::Other},
+ {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
+ DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
+ ObjSize -= 16;
+ Addr = SDValue(St1, 0);
+ Chain = SDValue(St1, 1);
}
- SDValue Ops[] = {DAG.getTargetConstant(ObjSize, dl, MVT::i64), Addr, Chain};
+
+ const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
+ SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
SDNode *St = DAG.getMachineNode(
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll
deleted file mode 100644
index 1bc93a82070f..000000000000
--- a/llvm/test/CodeGen/AArch64/settag-merge.ll
+++ /dev/null
@@ -1,214 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
-
-declare void @use(i8* %p)
-declare void @llvm.aarch64.settag(i8* %p, i64 %a)
-declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
-
-define void @stg16_16() {
-entry:
-; CHECK-LABEL: stg16_16:
-; CHECK: st2g sp, [sp], #32
-; CHECK: ret
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 16, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- call void @llvm.aarch64.settag(i8* %b, i64 16)
- ret void
-}
-
-define i32 @stg16_16_16_16_ret() {
-entry:
-; CHECK-LABEL: stg16_16_16_16_ret:
-; CHECK: st2g sp, [sp, #32]
-; CHECK: st2g sp, [sp], #64
-; CHECK: mov w0, wzr
-; CHECK: ret
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 16, align 16
- %c = alloca i8, i32 16, align 16
- %d = alloca i8, i32 16, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- call void @llvm.aarch64.settag(i8* %b, i64 16)
- call void @llvm.aarch64.settag(i8* %c, i64 16)
- call void @llvm.aarch64.settag(i8* %d, i64 16)
- ret i32 0
-}
-
-define void @stg16_16_16_16() {
-entry:
-; CHECK-LABEL: stg16_16_16_16:
-; CHECK: st2g sp, [sp, #32]
-; CHECK: st2g sp, [sp], #64
-; CHECK: ret
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 16, align 16
- %c = alloca i8, i32 16, align 16
- %d = alloca i8, i32 16, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- call void @llvm.aarch64.settag(i8* %b, i64 16)
- call void @llvm.aarch64.settag(i8* %c, i64 16)
- call void @llvm.aarch64.settag(i8* %d, i64 16)
- ret void
-}
-
-define void @stg128_128_128_128() {
-entry:
-; CHECK-LABEL: stg128_128_128_128:
-; CHECK: mov x8, #512
-; CHECK: st2g sp, [sp], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: ret
- %a = alloca i8, i32 128, align 16
- %b = alloca i8, i32 128, align 16
- %c = alloca i8, i32 128, align 16
- %d = alloca i8, i32 128, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 128)
- call void @llvm.aarch64.settag(i8* %b, i64 128)
- call void @llvm.aarch64.settag(i8* %c, i64 128)
- call void @llvm.aarch64.settag(i8* %d, i64 128)
- ret void
-}
-
-define void @stg16_512_16() {
-entry:
-; CHECK-LABEL: stg16_512_16:
-; CHECK: mov x8, #544
-; CHECK: st2g sp, [sp], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: ret
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 512, align 16
- %c = alloca i8, i32 16, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- call void @llvm.aarch64.settag(i8* %b, i64 512)
- call void @llvm.aarch64.settag(i8* %c, i64 16)
- ret void
-}
-
-define void @stg512_512_512() {
-entry:
-; CHECK-LABEL: stg512_512_512:
-; CHECK: mov x8, #1536
-; CHECK: st2g sp, [sp], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: ret
- %a = alloca i8, i32 512, align 16
- %b = alloca i8, i32 512, align 16
- %c = alloca i8, i32 512, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 512)
- call void @llvm.aarch64.settag(i8* %b, i64 512)
- call void @llvm.aarch64.settag(i8* %c, i64 512)
- ret void
-}
-
-define void @early(i1 %flag) {
-entry:
-; CHECK-LABEL: early:
-; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
-; CHECK: st2g sp, [sp, #
-; CHECK: st2g sp, [sp, #
-; CHECK: st2g sp, [sp, #
-; CHECK: [[LABEL]]:
-; CHECK: stg sp, [sp, #
-; CHECK: st2g sp, [sp], #
-; CHECK: ret
- %a = alloca i8, i32 48, align 16
- %b = alloca i8, i32 48, align 16
- %c = alloca i8, i32 48, align 16
- br i1 %flag, label %if.then, label %if.end
-
-if.then:
- call void @llvm.aarch64.settag(i8* %a, i64 48)
- call void @llvm.aarch64.settag(i8* %b, i64 48)
- br label %if.end
-
-if.end:
- call void @llvm.aarch64.settag(i8* %c, i64 48)
- ret void
-}
-
-define void @early_128_128(i1 %flag) {
-entry:
-; CHECK-LABEL: early_128_128:
-; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
-; CHECK: add x9, sp, #
-; CHECK: mov x8, #256
-; CHECK: st2g x9, [x9], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: [[LABEL]]:
-; CHECK: stg sp, [sp, #
-; CHECK: st2g sp, [sp], #
-; CHECK: ret
- %a = alloca i8, i32 128, align 16
- %b = alloca i8, i32 128, align 16
- %c = alloca i8, i32 48, align 16
- br i1 %flag, label %if.then, label %if.end
-
-if.then:
- call void @llvm.aarch64.settag(i8* %a, i64 128)
- call void @llvm.aarch64.settag(i8* %b, i64 128)
- br label %if.end
-
-if.end:
- call void @llvm.aarch64.settag(i8* %c, i64 48)
- ret void
-}
-
-define void @early_512_512(i1 %flag) {
-entry:
-; CHECK-LABEL: early_512_512:
-; CHECK: tbz w0, #0, [[LABEL:.LBB.*]]
-; CHECK: add x9, sp, #
-; CHECK: mov x8, #1024
-; CHECK: st2g x9, [x9], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: [[LABEL]]:
-; CHECK: stg sp, [sp, #
-; CHECK: st2g sp, [sp], #
-; CHECK: ret
- %a = alloca i8, i32 512, align 16
- %b = alloca i8, i32 512, align 16
- %c = alloca i8, i32 48, align 16
- br i1 %flag, label %if.then, label %if.end
-
-if.then:
- call void @llvm.aarch64.settag(i8* %a, i64 512)
- call void @llvm.aarch64.settag(i8* %b, i64 512)
- br label %if.end
-
-if.end:
- call void @llvm.aarch64.settag(i8* %c, i64 48)
- ret void
-}
-
-; Two loops of size 256; the second loop updates SP.
-define void @stg128_128_gap_128_128() {
-entry:
-; CHECK-LABEL: stg128_128_gap_128_128:
-; CHECK: mov x9, sp
-; CHECK: mov x8, #256
-; CHECK: st2g x9, [x9], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: mov x8, #256
-; CHECK: st2g sp, [sp], #32
-; CHECK: sub x8, x8, #32
-; CHECK: cbnz x8,
-; CHECK: ret
- %a = alloca i8, i32 128, align 16
- %a2 = alloca i8, i32 128, align 16
- %b = alloca i8, i32 32, align 16
- %c = alloca i8, i32 128, align 16
- %c2 = alloca i8, i32 128, align 16
- call void @use(i8* %b)
- call void @llvm.aarch64.settag(i8* %a, i64 128)
- call void @llvm.aarch64.settag(i8* %a2, i64 128)
- call void @llvm.aarch64.settag(i8* %c, i64 128)
- call void @llvm.aarch64.settag(i8* %c2, i64 128)
- ret void
-}
diff --git a/llvm/test/CodeGen/AArch64/settag-merge.mir b/llvm/test/CodeGen/AArch64/settag-merge.mir
deleted file mode 100644
index dc2a00c7d3d3..000000000000
--- a/llvm/test/CodeGen/AArch64/settag-merge.mir
+++ /dev/null
@@ -1,83 +0,0 @@
-# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s
-
---- |
- declare void @llvm.aarch64.settag(i8* nocapture writeonly, i64) argmemonly nounwind writeonly "target-features"="+mte"
- define i32 @stg16_16_16_16_ret() "target-features"="+mte" {
- entry:
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 16, align 16
- %c = alloca i8, i32 16, align 16
- %d = alloca i8, i32 16, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- call void @llvm.aarch64.settag(i8* %b, i64 16)
- call void @llvm.aarch64.settag(i8* %c, i64 16)
- call void @llvm.aarch64.settag(i8* %d, i64 16)
- ret i32 0
- }
-
- define void @stg16_store_128() "target-features"="+mte" {
- entry:
- %a = alloca i8, i32 16, align 16
- %b = alloca i8, i32 128, align 16
- call void @llvm.aarch64.settag(i8* %a, i64 16)
- store i8 42, i8* %a
- call void @llvm.aarch64.settag(i8* %b, i64 128)
- ret void
- }
-
-...
----
-# A sequence of STG with a register copy in the middle.
-# Can be merged into ST2G + ST2G.
-# CHECK-LABEL: name:{{.*}}stg16_16_16_16_ret
-# CHECK-DAG: ST2GOffset $sp, $sp, 2
-# CHECK-DAG: ST2GOffset $sp, $sp, 0
-# CHECK-DAG: $w0 = COPY $wzr
-# CHECK-DAG: RET_ReallyLR implicit killed $w0
-
-name: stg16_16_16_16_ret
-tracksRegLiveness: true
-stack:
- - { id: 0, name: a, size: 16, alignment: 16 }
- - { id: 1, name: b, size: 16, alignment: 16 }
- - { id: 2, name: c, size: 16, alignment: 16 }
- - { id: 3, name: d, size: 16, alignment: 16 }
-body: |
- bb.0.entry:
- STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a)
- STGOffset $sp, %stack.1.b, 0 :: (store 16 into %ir.b)
- STGOffset $sp, %stack.2.c, 0 :: (store 16 into %ir.c)
- $w0 = COPY $wzr
- STGOffset $sp, %stack.3.d, 0 :: (store 16 into %ir.d)
- RET_ReallyLR implicit killed $w0
-
-...
-
----
-# A store in the middle prevents merging.
-# CHECK-LABEL: name:{{.*}}stg16_store_128
-# CHECK: ST2GOffset $sp, $sp, 2
-# CHECK: ST2GOffset $sp, $sp, 4
-# CHECK: ST2GOffset $sp, $sp, 6
-# CHECK: STGOffset $sp, $sp, 8
-# CHECK: STRBBui
-# CHECK: ST2GOffset $sp, $sp, 0
-# CHECK: RET_ReallyLR
-
-name: stg16_store_128
-tracksRegLiveness: true
-stack:
- - { id: 0, name: a, size: 16, alignment: 16 }
- - { id: 1, name: b, size: 128, alignment: 16 }
-body: |
- bb.0.entry:
- STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a)
- renamable $w8 = MOVi32imm 42
- ST2GOffset $sp, %stack.1.b, 6 :: (store 32 into %ir.b + 96, align 16)
- ST2GOffset $sp, %stack.1.b, 4 :: (store 32 into %ir.b + 64, align 16)
- ST2GOffset $sp, %stack.1.b, 2 :: (store 32 into %ir.b + 32, align 16)
- STRBBui killed renamable $w8, %stack.0.a, 0 :: (store 1 into %ir.a, align 16)
- ST2GOffset $sp, %stack.1.b, 0 :: (store 32 into %ir.b, align 16)
- RET_ReallyLR
-
-...
diff --git a/llvm/test/CodeGen/AArch64/settag.ll b/llvm/test/CodeGen/AArch64/settag.ll
index 3deeb0155fe8..9ca188fbce32 100644
--- a/llvm/test/CodeGen/AArch64/settag.ll
+++ b/llvm/test/CodeGen/AArch64/settag.ll
@@ -64,8 +64,8 @@ entry:
define void @stg17(i8* %p) {
entry:
; CHECK-LABEL: stg17:
-; CHECK: stg x0, [x0], #16
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
+; CHECK: stg x0, [x0], #16
; CHECK: st2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
@@ -87,8 +87,8 @@ entry:
define void @stzg17(i8* %p) {
entry:
; CHECK-LABEL: stzg17:
-; CHECK: stzg x0, [x0], #16
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
+; CHECK: stzg x0, [x0], #16
; CHECK: stz2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
@@ -110,10 +110,10 @@ entry:
define void @stg_alloca5() {
entry:
; CHECK-LABEL: stg_alloca5:
-; CHECK: st2g sp, [sp, #32]
-; CHECK-NEXT: stg sp, [sp, #64]
-; CHECK-NEXT: st2g sp, [sp], #80
-; CHECK-NEXT: ret
+; CHECK: stg sp, [sp, #64]
+; CHECK: st2g sp, [sp, #32]
+; CHECK: st2g sp, [sp]
+; CHECK: ret
%a = alloca i8, i32 80, align 16
call void @llvm.aarch64.settag(i8* %a, i64 80)
ret void
@@ -122,11 +122,12 @@ entry:
define void @stg_alloca17() {
entry:
; CHECK-LABEL: stg_alloca17:
+; CHECK: mov [[P:x[0-9]+]], sp
+; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
-; CHECK: st2g sp, [sp], #32
+; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
-; CHECK: stg sp, [sp], #16
; CHECK: ret
%a = alloca i8, i32 272, align 16
call void @llvm.aarch64.settag(i8* %a, i64 272)
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll b/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll
index ed6ccc8b4941..200837dabfe0 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll
@@ -210,10 +210,11 @@ entry:
; DEFAULT: ldrb [[A:w.*]], [x{{.*}}]
; DEFAULT: ldrb [[B:w.*]], [x{{.*}}]
-; ALWAYS-DAG: ldg [[PA:x.*]], [x{{.*}}]
-; ALWAYS-DAG: ldrb [[B:w.*]], [sp]
-; ALWAYS-DAG: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
+; ALWAYS: ldg [[PA:x.*]], [x{{.*}}]
+; ALWAYS: ldrb [[B:w.*]], [sp]
+; ALWAYS: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
+; COMMON: add w0, [[B]], [[A]]
; COMMON: ret
; One of these allocas is closer to FP than to SP, and within 256 bytes
More information about the llvm-commits
mailing list