[llvm] [AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. (PR #73655)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 05:39:19 PST 2023
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/73655
>From 25f4c236b67af400cc3210a8b278b2386bb678ac Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 30 Nov 2023 12:17:25 +0000
Subject: [PATCH] [AArch64] Teach areMemAccessesTriviallyDisjoint about
scalable widths.
The base change here is to change getMemOperandWithOffsetWidth to return a
TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason
about trivially disjoint widths.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 3 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 112 ++++++-----
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 +-
.../AArch64LowerHomogeneousPrologEpilog.cpp | 6 +-
.../alloca-load-store-scalable-array.ll | 2 +-
llvm/test/CodeGen/AArch64/sve-aliasing.ll | 186 +++++++++---------
llvm/test/CodeGen/AArch64/sve-aliasing.mir | 65 ++++++
.../test/CodeGen/AArch64/sve-insert-vector.ll | 2 +-
8 files changed, 224 insertions(+), 156 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-aliasing.mir
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..95ac21214a5ceab 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
- TypeSize Scale = TypeSize::getFixed(1);
- unsigned Width;
+ TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e97f17e3f49c587..55c21f1b7ddbc72 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1096,7 +1096,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
- unsigned WidthA = 0, WidthB = 0;
+ TypeSize WidthA(0, false), WidthB(0, false);
bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
@@ -1121,8 +1121,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
- int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
- if (LowOffset + LowWidth <= HighOffset)
+ TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowWidth.isScalable() == OffsetAIsScalable &&
+ LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
return true;
}
}
@@ -2675,9 +2676,16 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
const MachineOperand *BaseOp;
+ TypeSize WidthN(0, false);
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
- Width, TRI))
+ WidthN, TRI))
return false;
+ // The maximum vscale is 16 under AArch64, return the maximal extent for the
+ // vector.
+ Width = WidthN.isScalable()
+ ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector /
+ AArch64::SVEBitsPerBlock
+ : WidthN.getKnownMinValue();
BaseOps.push_back(BaseOp);
return true;
}
@@ -3456,7 +3464,7 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- bool &OffsetIsScalable, unsigned &Width,
+ bool &OffsetIsScalable, TypeSize &Width,
const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
@@ -3511,26 +3519,25 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
}
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
- unsigned &Width, int64_t &MinOffset,
+ TypeSize &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
- const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
Scale = TypeSize::getFixed(0);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
- Width = 32;
+ Width = TypeSize::getFixed(32);
Scale = TypeSize::getFixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
- Width = 16;
+ Width = TypeSize::getFixed(16);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3542,7 +3549,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURXi:
case AArch64::STURDi:
case AArch64::STLURXi:
- Width = 8;
+ Width = TypeSize::getFixed(8);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3555,7 +3562,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURWi:
case AArch64::STURSi:
case AArch64::STLURWi:
- Width = 4;
+ Width = TypeSize::getFixed(4);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3570,7 +3577,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURHi:
case AArch64::STURHHi:
case AArch64::STLURHi:
- Width = 2;
+ Width = TypeSize::getFixed(2);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3585,7 +3592,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURBi:
case AArch64::STURBBi:
case AArch64::STLURBi:
- Width = 1;
+ Width = TypeSize::getFixed(1);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3595,14 +3602,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPQi:
case AArch64::STNPQi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3615,7 +3622,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPXi:
case AArch64::STNPDi:
Scale = TypeSize::getFixed(8);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3625,14 +3632,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRXui:
case AArch64::STRDui:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3645,7 +3652,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPWi:
case AArch64::STNPSi:
Scale = TypeSize::getFixed(4);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3655,7 +3662,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRWui:
case AArch64::STRSui:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3666,7 +3673,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3677,7 +3684,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3686,14 +3693,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPDpre:
case AArch64::LDPDpost:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -1024;
MaxOffset = 1008;
break;
@@ -3702,26 +3709,26 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRXpost:
case AArch64::LDRDpost:
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
Scale = TypeSize::getFixed(1);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
MinOffset = -63;
@@ -3731,42 +3738,42 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STGi:
case AArch64::STZGi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -3793,7 +3800,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3806,7 +3813,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST2W_IMM:
case AArch64::ST2D_IMM:
Scale = TypeSize::getScalable(32);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3819,7 +3826,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST3W_IMM:
case AArch64::ST3D_IMM:
Scale = TypeSize::getScalable(48);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3832,7 +3839,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST4W_IMM:
case AArch64::ST4D_IMM:
Scale = TypeSize::getScalable(64);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3854,7 +3861,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(8);
- Width = SVEMaxBytesPerVector / 2;
+ Width = TypeSize::getScalable(8);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3871,7 +3878,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(4);
- Width = SVEMaxBytesPerVector / 4;
+ Width = TypeSize::getScalable(4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3883,20 +3890,20 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2Gi:
case AArch64::STZ2Gi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3908,7 +3915,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3918,7 +3925,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3926,13 +3933,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RD_IMM:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -5634,8 +5641,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
}
// Get the min/max offset and the scale.
- TypeSize ScaleValue(0U, false);
- unsigned Width;
+ TypeSize ScaleValue(0U, false), Width(0U, false);
int64_t MinOff, MaxOff;
if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
@@ -8392,8 +8398,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- TypeSize Scale(0U, false); // The scale to multiply the offsets by.
- unsigned DummyWidth;
+ // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false), DummyWidth(0U, false);
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
@@ -8898,7 +8904,7 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
const MachineOperand *Base;
- unsigned Width;
+ TypeSize Width(0, false);
int64_t Offset;
bool OffsetIsScalable;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index cc588cdad6b8e5a..c63e856d46fb3ee 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -165,7 +165,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
const MachineOperand *&BaseOp,
int64_t &Offset, bool &OffsetIsScalable,
- unsigned &Width,
+ TypeSize &Width,
const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -175,7 +175,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
+ static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
int64_t &MinOffset, int64_t &MaxOffset);
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index b8b74ae8404d3f3..4afc678abaca63c 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -220,8 +220,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
@@ -262,8 +261,7 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 110f0ef7f4a5500..7244ac949ab88c3 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -59,9 +59,9 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.ll b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
index a6d7e1c0fbab173..a83dc494b3bd2c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-aliasing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
@@ -8,15 +8,15 @@ define void @scalable_v16i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.b, p0/m, z3.b, z1.b
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 16 x i8>, ptr %l0, align 16
%l5 = mul <vscale x 16 x i8> %l3, %l3
@@ -37,15 +37,15 @@ define void @scalable_v8i16(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.h, p0/m, z3.h, z1.h
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1h { z0.h }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 8 x i16>, ptr %l0, align 16
%l5 = mul <vscale x 8 x i16> %l3, %l3
@@ -66,15 +66,15 @@ define void @scalable_v4i32(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.s, p0/m, z3.s, z1.s
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 4 x i32>, ptr %l0, align 16
%l5 = mul <vscale x 4 x i32> %l3, %l3
@@ -95,15 +95,15 @@ define void @scalable_v2i64(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 2 x i64>, ptr %l0, align 16
%l5 = mul <vscale x 2 x i64> %l3, %l3
@@ -124,15 +124,15 @@ define void @scalable_v8i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.h, p0/m, z3.h, z1.h
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
-; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.h }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.h }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 8 x i8>, ptr %l0, align 16
%s3 = sext <vscale x 8 x i8> %l3 to <vscale x 8 x i16>
@@ -157,15 +157,15 @@ define void @scalable_v4i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.s, p0/m, z3.s, z1.s
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
-; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.s }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.s }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 4 x i8>, ptr %l0, align 16
%s3 = sext <vscale x 4 x i8> %l3 to <vscale x 4 x i32>
@@ -190,15 +190,15 @@ define void @scalable_v2i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
-; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.d }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 2 x i8>, ptr %l0, align 16
%s3 = sext <vscale x 2 x i8> %l3 to <vscale x 2 x i64>
@@ -223,15 +223,15 @@ define void @scalable_v4i16(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.s, p0/m, z3.s, z1.s
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
-; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1h { z0.s }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1h { z1.s }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 4 x i16>, ptr %l0, align 16
%s3 = sext <vscale x 4 x i16> %l3 to <vscale x 4 x i32>
@@ -256,15 +256,15 @@ define void @scalable_v2i16(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
-; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1h { z1.d }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 2 x i16>, ptr %l0, align 16
%s3 = sext <vscale x 2 x i16> %l3 to <vscale x 2 x i64>
@@ -289,15 +289,15 @@ define void @scalable_v2i32(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
-; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1w { z1.d }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 2 x i32>, ptr %l0, align 16
%s3 = sext <vscale x 2 x i32> %l3 to <vscale x 2 x i64>
@@ -457,20 +457,20 @@ define void @triple_v16i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: mul z3.b, p0/m, z3.b, z0.b
+; CHECK-NEXT: movprfx z4, z1
+; CHECK-NEXT: mul z4.b, p0/m, z4.b, z1.b
+; CHECK-NEXT: movprfx z5, z2
+; CHECK-NEXT: mul z5.b, p0/m, z5.b, z2.b
+; CHECK-NEXT: eor z0.d, z3.d, z0.d
+; CHECK-NEXT: eor z1.d, z4.d, z1.d
+; CHECK-NEXT: eor z2.d, z5.d, z2.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, #1, mul vl]
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z2.b }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 16 x i8>, ptr %l0, align 16
%l5 = mul <vscale x 16 x i8> %l3, %l3
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.mir b/llvm/test/CodeGen/AArch64/sve-aliasing.mir
new file mode 100644
index 000000000000000..3b7c9fefa5277ed
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.mir
@@ -0,0 +1,65 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=aarch64 -run-pass=machine-scheduler -verify-machineinstrs | FileCheck %s
+
+---
+name: scalable_v16i1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64common, preferred-register: '' }
+ - { id: 1, class: ppr, preferred-register: '' }
+ - { id: 2, class: ppr, preferred-register: '' }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$p0', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x0, $p0
+
+ ; CHECK-LABEL: name: scalable_v16i1
+ ; CHECK: liveins: $x0, $p0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDR_PXI:%[0-9]+]]:ppr = LDR_PXI [[COPY]], 1 :: (load unknown-size, align 16)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ppr = COPY $p0
+ ; CHECK-NEXT: $p0 = COPY [[LDR_PXI]]
+ ; CHECK-NEXT: STR_PXI [[COPY1]], [[COPY]], 0 :: (store unknown-size, align 16)
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0
+ %1:ppr = COPY $p0
+ %0:gpr64common = COPY $x0
+ STR_PXI %1, %0, 0 :: (store unknown-size, align 16)
+ %2:ppr = LDR_PXI %0, 1 :: (load unknown-size, align 16)
+ $p0 = COPY %2
+ RET_ReallyLR implicit $p0
+
+...
+---
+name: scalable_neg_v16i1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64common, preferred-register: '' }
+ - { id: 1, class: ppr, preferred-register: '' }
+ - { id: 2, class: ppr, preferred-register: '' }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$p0', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $x0, $p0
+
+ ; CHECK-LABEL: name: scalable_neg_v16i1
+ ; CHECK: liveins: $x0, $p0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDR_PXI:%[0-9]+]]:ppr = LDR_PXI [[COPY]], -1 :: (load unknown-size, align 16)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ppr = COPY $p0
+ ; CHECK-NEXT: $p0 = COPY [[LDR_PXI]]
+ ; CHECK-NEXT: STR_PXI [[COPY1]], [[COPY]], 0 :: (store unknown-size, align 16)
+ ; CHECK-NEXT: RET_ReallyLR implicit $p0
+ %1:ppr = COPY $p0
+ %0:gpr64common = COPY $x0
+ STR_PXI %1, %0, 0 :: (store unknown-size, align 16)
+ %2:ppr = LDR_PXI %0, -1 :: (load unknown-size, align 16)
+ $p0 = COPY %2
+ RET_ReallyLR implicit $p0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index b7f9ef839090306..5ffd7f1dfe16534 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -463,8 +463,8 @@ define <vscale x 6 x i32> @insert_nxv6i32_nxv2i32(<vscale x 2 x i32> %sv0, <vsc
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list