[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 23 21:59:35 PST 2025
================
@@ -5907,6 +5910,82 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+ const TargetLowering &TLI, unsigned Width,
+ EVT WidenVT, unsigned Align,
+ unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ TypeSize LdWidth = LdVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - LdWidth;
+ // Allow wider loads if they are sufficiently aligned to avoid memory faults
+ // and if the original load is simple.
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
+
+ // Find the vector type that can load from.
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinValue());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ SDValue LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+ BasePtr, LD->getMemOperand());
+
+ // Load the element with one instruction.
+ SDValue Result;
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ Result = DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ else if (FirstVT == WidenVT)
+ Result = LdOp;
+ else {
+ // TODO: We don't currently have any tests that exercise this code path.
+ assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ Result = DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+ }
+
----------------
arsenm wrote:
The type coercion code is the sharable part that could be extracted into a helper function
https://github.com/llvm/llvm-project/pull/120598
More information about the llvm-branch-commits
mailing list