[llvm] [AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. (PR #73655)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 06:58:29 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.
---
Patch is 31.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73655.diff
7 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+1-2)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+58-53)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.h (+2-2)
- (modified) llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (+2-4)
- (modified) llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-aliasing.ll (+93-93)
- (modified) llvm/test/CodeGen/AArch64/sve-insert-vector.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..6faad4ec4acb2c3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
- TypeSize Scale = TypeSize::getFixed(1);
- unsigned Width;
+ TypeSize Scale = TypeSize::getFixed(1), Width(0, false);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a95ab3c7e0f288a..01fecbf4c1408be 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1096,7 +1096,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
- unsigned WidthA = 0, WidthB = 0;
+ TypeSize WidthA(0, false), WidthB(0, false);
bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
@@ -1121,8 +1121,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
- int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
- if (LowOffset + LowWidth <= HighOffset)
+ TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowWidth.isScalable() == OffsetAIsScalable &&
+ LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
return true;
}
}
@@ -2675,9 +2676,15 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
const MachineOperand *BaseOp;
+ TypeSize WidthN(0, false);
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
- Width, TRI))
+ WidthN, TRI))
return false;
+ // The maximum vscale is 16 undef AArch64, return the maximal extent for the
+ // vector.
+ Width = WidthN.isScalable()
+ ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector / 128
+ : WidthN.getKnownMinValue();
BaseOps.push_back(BaseOp);
return true;
}
@@ -3456,7 +3463,7 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- bool &OffsetIsScalable, unsigned &Width,
+ bool &OffsetIsScalable, TypeSize &Width,
const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
@@ -3511,26 +3518,25 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
}
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
- unsigned &Width, int64_t &MinOffset,
+ TypeSize &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
- const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
Scale = TypeSize::getFixed(0);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
- Width = 32;
+ Width = TypeSize::getFixed(32);
Scale = TypeSize::getFixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
- Width = 16;
+ Width = TypeSize::getFixed(16);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3542,7 +3548,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURXi:
case AArch64::STURDi:
case AArch64::STLURXi:
- Width = 8;
+ Width = TypeSize::getFixed(8);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3555,7 +3561,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURWi:
case AArch64::STURSi:
case AArch64::STLURWi:
- Width = 4;
+ Width = TypeSize::getFixed(4);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3570,7 +3576,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURHi:
case AArch64::STURHHi:
case AArch64::STLURHi:
- Width = 2;
+ Width = TypeSize::getFixed(2);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3585,7 +3591,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURBi:
case AArch64::STURBBi:
case AArch64::STLURBi:
- Width = 1;
+ Width = TypeSize::getFixed(1);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3595,14 +3601,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPQi:
case AArch64::STNPQi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3615,7 +3621,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPXi:
case AArch64::STNPDi:
Scale = TypeSize::getFixed(8);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3625,14 +3631,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRXui:
case AArch64::STRDui:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3645,7 +3651,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPWi:
case AArch64::STNPSi:
Scale = TypeSize::getFixed(4);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3655,7 +3661,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRWui:
case AArch64::STRSui:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3666,7 +3672,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3677,7 +3683,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3686,14 +3692,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPDpre:
case AArch64::LDPDpost:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -1024;
MaxOffset = 1008;
break;
@@ -3702,26 +3708,26 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRXpost:
case AArch64::LDRDpost:
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
Scale = TypeSize::getFixed(1);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
MinOffset = -63;
@@ -3731,42 +3737,42 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STGi:
case AArch64::STZGi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -3793,7 +3799,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3806,7 +3812,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST2W_IMM:
case AArch64::ST2D_IMM:
Scale = TypeSize::getScalable(32);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3819,7 +3825,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST3W_IMM:
case AArch64::ST3D_IMM:
Scale = TypeSize::getScalable(48);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3832,7 +3838,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST4W_IMM:
case AArch64::ST4D_IMM:
Scale = TypeSize::getScalable(64);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3854,7 +3860,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(8);
- Width = SVEMaxBytesPerVector / 2;
+ Width = TypeSize::getScalable(8);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3871,7 +3877,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(4);
- Width = SVEMaxBytesPerVector / 4;
+ Width = TypeSize::getScalable(4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3883,20 +3889,20 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2Gi:
case AArch64::STZ2Gi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3908,7 +3914,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3918,7 +3924,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3926,13 +3932,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RD_IMM:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -5634,8 +5640,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
}
// Get the min/max offset and the scale.
- TypeSize ScaleValue(0U, false);
- unsigned Width;
+ TypeSize ScaleValue(0U, false), Width(0U, false);
int64_t MinOff, MaxOff;
if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
@@ -8392,8 +8397,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- TypeSize Scale(0U, false); // The scale to multiply the offsets by.
- unsigned DummyWidth;
+ // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false), DummyWidth(0, false);
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
@@ -8898,7 +8903,7 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
const MachineOperand *Base;
- unsigned Width;
+ TypeSize Width(0, false);
int64_t Offset;
bool OffsetIsScalable;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a934103c90cbf92..8835e6a1779983e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -165,7 +165,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
const MachineOperand *&BaseOp,
int64_t &Offset, bool &OffsetIsScalable,
- unsigned &Width,
+ TypeSize &Width,
const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -175,7 +175,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
+ static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
int64_t &MinOffset, int64_t &MaxOffset);
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index b8b74ae8404d3f3..4afc678abaca63c 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -220,8 +220,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
@@ -262,8 +261,7 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 110f0ef7f4a5500..7244ac949ab88c3 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -59,9 +59,9 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.ll b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
index a6d7e1c0fbab173..a83dc494b3bd2c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-aliasing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
@@ -8,15 +8,15 @@ define void @scalable_v16i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.b, p0/m, z3.b, z1.b
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 16 x i8>, ptr %l0, align 16
%l5 = mul <vscale x 16 x i8> %l3, %l3
@@ -37,15 +37,15 @@ define void @scalable_v8i16(ptr noalias nocapture nou...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/73655
More information about the llvm-commits
mailing list