[llvm] [CodeGen] Update for scalable MemoryType in MMO (PR #70452)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 03:09:50 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/70452
>From 71ef5e0e95daea46408e59dbb1f14146de9aa868 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Mar 2024 11:01:59 +0000
Subject: [PATCH] [SelectionDAG] Update for scalable MemoryType in MMO
Remove getSizeOrUnknown call when MachineMemOperand is created. For Scalable
TypeSize, the MemoryType created becomes a scalable_vector.
2 MMOs that have scalable memory access can then use the updated BasicAA that
understands scalable LocationSize.
Original Patch by Harvin Iriawan
---
llvm/include/llvm/Analysis/MemoryLocation.h | 7 ---
llvm/include/llvm/CodeGen/MachineFunction.h | 5 +-
llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 34 +++++++++----
llvm/lib/CodeGen/MachineInstr.cpp | 37 ++++++++++----
llvm/lib/CodeGen/MachineOperand.cpp | 13 ++---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 ++++++++++++-------
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 22 ++++----
.../SelectionDAGAddressAnalysis.cpp | 2 -
.../SelectionDAG/SelectionDAGBuilder.cpp | 3 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 5 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 ++++++--
llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll | 6 +--
.../alloca-load-store-scalable-array.ll | 36 ++++++-------
.../alloca-load-store-scalable-struct.ll | 12 ++---
.../rvv/alloca-load-store-scalable-array.ll | 12 ++---
.../rvv/alloca-load-store-scalable-struct.ll | 8 +--
.../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll | 8 +--
17 files changed, 165 insertions(+), 118 deletions(-)
diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 830eed5d60ee46..7d896c44f46795 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -297,13 +297,6 @@ class MemoryLocation {
return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags);
}
- // Return the exact size if the exact size is known at compiletime,
- // otherwise return LocationSize::beforeOrAfterPointer().
- static LocationSize getSizeOrUnknown(const TypeSize &T) {
- return T.isScalable() ? LocationSize::beforeOrAfterPointer()
- : LocationSize::precise(T.getFixedValue());
- }
-
MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {}
explicit MemoryLocation(const Value *Ptr, LocationSize Size,
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index dfbf7a1e7aae53..c2bff279449398 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -1058,8 +1058,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
int64_t Offset, LocationSize Size) {
return getMachineMemOperand(
MMO, Offset,
- !Size.hasValue() || Size.isScalable()
- ? LLT()
+ !Size.hasValue() ? LLT()
+ : Size.isScalable()
+ ? LLT::scalable_vector(1, 8 * Size.getValue().getKnownMinValue())
: LLT::scalar(8 * Size.getValue().getKnownMinValue()));
}
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 9fc8ecd60b03ff..fb9656c09ca39d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -128,14 +128,14 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
- if (PtrDiff >= 0 && Size1.hasValue()) {
+ if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// ========PtrDiff========>
IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
return true;
}
- if (PtrDiff < 0 && Size2.hasValue()) {
+ if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) {
// [----BasePtr0----]
// [---BasePtr1--]
// =====(-PtrDiff)====>
@@ -248,10 +248,20 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
return false;
}
+ // If NumBytes is scalable and offset is not 0, conservatively return may
+ // alias
+ if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) ||
+ (MUC1.NumBytes.isScalable() && MUC1.Offset != 0))
+ return true;
+
+ const bool BothNotScalable =
+ !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
+
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
- if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ if (BothNotScalable &&
+ GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
return IsAlias;
// The following all rely on MMO0 and MMO1 being valid.
@@ -267,12 +277,18 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
- if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
- MUC0.MMO->getAAInfo()),
- MemoryLocation(MUC1.MMO->getValue(), Overlap1,
- MUC1.MMO->getAAInfo())))
+ int64_t Overlap0 =
+ Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 =
+ Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
+ LocationSize Loc0 =
+ Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
+ LocationSize Loc1 =
+ Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
+
+ if (AA->isNoAlias(
+ MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
return false;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index fe2f9ccd33a330..8102bb971ba66e 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1306,6 +1306,7 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
LocationSize WidthB = MMOb->getSize();
bool KnownWidthA = WidthA.hasValue();
bool KnownWidthB = WidthB.hasValue();
+ bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable();
const Value *ValA = MMOa->getValue();
const Value *ValB = MMOb->getValue();
@@ -1321,12 +1322,14 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
SameVal = true;
}
- if (SameVal) {
+ if (SameVal && BothMMONonScalable) {
if (!KnownWidthA || !KnownWidthB)
return true;
int64_t MaxOffset = std::max(OffsetA, OffsetB);
- LocationSize LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
- return (MinOffset + (int)LowWidth.getValue() > MaxOffset);
+ int64_t LowWidth = (MinOffset == OffsetA)
+ ? WidthA.getValue().getKnownMinValue()
+ : WidthB.getValue().getKnownMinValue();
+ return (MinOffset + LowWidth > MaxOffset);
}
if (!AA)
@@ -1338,15 +1341,29 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- int64_t OverlapA = KnownWidthA ? WidthA.getValue() + OffsetA - MinOffset
- : MemoryLocation::UnknownSize;
- int64_t OverlapB = KnownWidthB ? WidthB.getValue() + OffsetB - MinOffset
- : MemoryLocation::UnknownSize;
+ // If Scalable Location Size has non-zero offset, Width + Offset does not work
+ // at the moment
+ if ((WidthA.isScalable() && OffsetA > 0) ||
+ (WidthB.isScalable() && OffsetB > 0))
+ return true;
+
+ int64_t OverlapA =
+ KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
+ int64_t OverlapB =
+ KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
+
+ LocationSize LocA = (WidthA.isScalable() || !KnownWidthA)
+ ? WidthA
+ : LocationSize::precise(OverlapA);
+ LocationSize LocB = (WidthB.isScalable() || !KnownWidthB)
+ ? WidthB
+ : LocationSize::precise(OverlapB);
return !AA->isNoAlias(
- MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, OverlapB,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+ MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
}
bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 937ca539513afd..ace05902d5df79 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1107,12 +1107,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
- : MachineMemOperand(ptrinfo, F,
- !TS.hasValue() || TS.isScalable()
- ? LLT()
- : LLT::scalar(8 * TS.getValue().getKnownMinValue()),
- BaseAlignment, AAInfo, Ranges, SSID, Ordering,
- FailureOrdering) {}
+ : MachineMemOperand(
+ ptrinfo, F,
+ !TS.hasValue() ? LLT()
+ : TS.isScalable()
+ ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue())
+ : LLT::scalar(8 * TS.getValue().getKnownMinValue()),
+ BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d6d0bf02c63975..d17040d66b8118 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24181,7 +24181,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
- LocationSize StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
+ LocationSize StoreSize = LocationSize::precise(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO;
if (Offset.isScalable()) {
@@ -27826,14 +27826,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
- LocationSize Size =
- MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
- return {LSN->isVolatile(),
- LSN->isAtomic(),
- LSN->getBasePtr(),
- Offset /*base offset*/,
- Size,
- LSN->getMemOperand()};
+ TypeSize Size = LSN->getMemoryVT().getStoreSize();
+ return {LSN->isVolatile(), LSN->isAtomic(),
+ LSN->getBasePtr(), Offset /*base offset*/,
+ LocationSize::precise(Size), LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
return {false /*isVolatile*/,
@@ -27875,6 +27871,13 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
return false;
}
+ // If NumBytes is scalable and offset is not 0, conservatively return may
+ // alias
+ if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
+ MUC0.Offset != 0) ||
+ (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
+ MUC1.Offset != 0))
+ return true;
// Try to prove that there is aliasing, or that there is no aliasing. Either
// way, we can return now. If nothing can be proved, proceed with more tests.
bool IsAlias;
@@ -27905,18 +27908,22 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
LocationSize Size0 = MUC0.NumBytes;
LocationSize Size1 = MUC1.NumBytes;
+
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
- OrigAlignment0 > Size0.getValue() &&
- SrcValOffset0 % Size0.getValue() == 0 &&
- SrcValOffset1 % Size1.getValue() == 0) {
+ Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
+ !Size1.isScalable() && Size0 == Size1 &&
+ OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
+ SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
+ SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 ||
- (OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0)
+ if ((OffAlign0 + static_cast<int64_t>(
+ Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
+ (OffAlign1 + static_cast<int64_t>(
+ Size1.getValue().getKnownMinValue())) <= OffAlign0)
return false;
}
@@ -27933,12 +27940,18 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
+ int64_t Overlap0 =
+ Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 =
+ Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
+ LocationSize Loc0 =
+ Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
+ LocationSize Loc1 =
+ Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
if (AA->isNoAlias(
- MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MemoryLocation(MUC0.MMO->getValue(), Loc0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
- MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MemoryLocation(MUC1.MMO->getValue(), Loc1,
UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
return false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6f6ed4bd45027b..f1edc7976cf4d4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8404,9 +8404,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
MachineMemOperand::Flags Flags, LocationSize Size,
const AAMDNodes &AAInfo) {
- if (Size.hasValue() && MemVT.isScalableVector())
- Size = LocationSize::beforeOrAfterPointer();
- else if (Size.hasValue() && !Size.getValue())
+ if (Size.hasValue() && !Size.getValue())
Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
@@ -8569,7 +8567,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
- LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
@@ -8690,8 +8688,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
- LocationSize Size =
- MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize());
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
@@ -8744,8 +8741,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
- Alignment, AAInfo);
+ PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
+ AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -8839,7 +8836,7 @@ SDValue SelectionDAG::getLoadVP(
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
- LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
Alignment, AAInfo, Ranges);
@@ -8992,8 +8989,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
- Alignment, AAInfo);
+ PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
+ AAInfo);
return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
IsCompressing);
}
@@ -11728,10 +11725,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
- // TODO: Make MachineMemOperands aware of scalable vectors.
assert(
(!MMO->getType().isValid() ||
- memvt.getStoreSize().getKnownMinValue() <= MMO->getSize().getValue()) &&
+ TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) &&
"Size mismatch!");
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 9670c3ac8430eb..f2ab88851b780e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -106,8 +106,6 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
- // One example of unknown size memory access is to load/store scalable
- // vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8780f4b931484f..f575ce37e8326b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4960,7 +4960,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges);
+ LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(),
+ Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 02943b8a4ab158..d0c5e6b99e9eec 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2687,10 +2687,7 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
// The maximum vscale is 16 under AArch64, return the maximal extent for the
// vector.
- Width = WidthN.isScalable()
- ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector /
- AArch64::SVEBitsPerBlock
- : WidthN.getKnownMinValue();
+ Width = LocationSize::precise(WidthN);
BaseOps.push_back(BaseOp);
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 105587f0810b8c..ed988566143e43 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10340,9 +10340,15 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
+ MachineMemOperand *MMO = Load->getMemOperand();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MMO = MF.getMachineMemOperand(
+ MMO, MMO->getPointerInfo(),
+ MMO->getMemoryType().isValid()
+ ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
+ : MMO->getMemoryType());
SDValue NewLoad =
- DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
- Load->getMemOperand());
+ DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO);
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
}
@@ -10400,9 +10406,17 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
const auto [MinVLMAX, MaxVLMAX] =
RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
- getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
+ getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
+ MachineMemOperand *MMO = Store->getMemOperand();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MMO = MF.getMachineMemOperand(
+ MMO, MMO->getPointerInfo(),
+ MMO->getMemoryType().isValid()
+ ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
+ : MMO->getMemoryType());
return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
- Store->getMemOperand());
+ MMO);
+ }
SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
Subtarget);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
index 58299696e78fc2..d4d803a91cfa14 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
@@ -3,7 +3,7 @@
define void @UphPNR(target("aarch64.svcount") %predcnt) {
entry:
; CHECK: %0:ppr = COPY $p0
-; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
+; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
; CHECK: %1:pnr_p8to15 = COPY %0
; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_p8to15 */, %1
; CHECK: RET_ReallyLR
@@ -17,7 +17,7 @@ entry:
define void @UpaPNR(target("aarch64.svcount") %predcnt) {
entry:
; CHECK: %0:ppr = COPY $p0
-; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
+; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
; CHECK: %1:pnr = COPY %0
; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR */, %1
; CHECK: RET_ReallyLR
@@ -31,7 +31,7 @@ entry:
define void @UplPNR(target("aarch64.svcount") %predcnt) {
entry:
; CHECK: %0:ppr = COPY $p0
-; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
+; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
; CHECK: %1:pnr_3b = COPY %0
; CHECK: INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_3b */, %1
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 9a4e01a29ecb6d..7244ac949ab88c 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -14,12 +14,12 @@ define void @array_1D(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT: st1d { z2.d }, p0, [sp]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -81,18 +81,18 @@ define void @array_2D(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #5, mul vl]
-; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #4, mul vl]
-; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
-; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #5, mul vl]
-; CHECK-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl]
-; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
-; CHECK-NEXT: st1d { z5.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT: st1d { z4.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT: st1d { z2.d }, p0, [sp]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #5, mul vl]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT: st1d { z5.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT: st1d { z4.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #6
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
index 7292d52aaf4765..f03a6f018d34d0 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
@@ -13,12 +13,12 @@ define void @test(ptr %addr) #0 {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT: st1d { z2.d }, p0, [sp]
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
index 1d025a2f776f82..1fe91c721f4dd2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
@@ -18,15 +18,15 @@ define void @test(ptr %addr) {
; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vl1re64.v v8, (a2)
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a3, a0, a2
-; CHECK-NEXT: vl1re64.v v9, (a3)
+; CHECK-NEXT: vl1re64.v v9, (a0)
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: vl1re64.v v10, (a0)
; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v9, (a0)
; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs1r.v v9, (a2)
-; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: vs1r.v v8, (a1)
-; CHECK-NEXT: vs1r.v v10, (a0)
+; CHECK-NEXT: vs1r.v v10, (a2)
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: csrrs a0, vlenb, zero
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
index 64031f8a93598f..a9a680d54d5897 100644
--- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
@@ -16,13 +16,13 @@ define <vscale x 1 x double> @test(ptr %addr, i64 %vl) {
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; CHECK-NEXT: csrrs a2, vlenb, zero
-; CHECK-NEXT: add a3, a0, a2
-; CHECK-NEXT: vl1re64.v v8, (a3)
+; CHECK-NEXT: vl1re64.v v8, (a0)
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs1r.v v8, (a2)
-; CHECK-NEXT: vs1r.v v9, (a0)
+; CHECK-NEXT: vs1r.v v9, (a2)
; CHECK-NEXT: vl1re64.v v8, (a2)
; CHECK-NEXT: vl1re64.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
index 0544204cce792a..52bd15742ef4bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
@@ -16,8 +16,8 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
- ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */
- ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8)
+ ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 8)
+ ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoRET
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -37,8 +37,8 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
- ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */
- ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8)
+ ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size from %ir.p, align 8)
+ ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoRET
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
More information about the llvm-commits
mailing list