[llvm] [NVPTX] fold movs into loads and stores (PR #144581)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 18:53:04 PDT 2025
================
@@ -5047,26 +5043,229 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
-static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
- std::size_t Back) {
+/// Combine extractelts into a load by increasing the number of return values.
+static SDValue
+combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ // Don't run this optimization before the legalizer
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ EVT ElemVT = N->getValueType(0);
+ if (!Isv2x16VT(ElemVT))
+ return SDValue();
+
+ // Check whether all outputs are either used by an extractelt or are
+ // glue/chain nodes
+ if (!all_of(N->uses(), [&](SDUse &U) {
+ return U.getValueType() != ElemVT ||
+ (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ // also check that the extractelt is used if this is an
+ // ISD::LOAD, otherwise it may be optimized by something else
+ (N->getOpcode() != ISD::LOAD || !U.getUser()->use_empty()));
+ }))
+ return SDValue();
+
+ auto *LD = cast<MemSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ SDLoc DL(LD);
+
+ // the new opcode after we double the number of operands
+ NVPTXISD::NodeType Opcode;
+ SmallVector<SDValue> Operands(LD->ops());
+ switch (LD->getOpcode()) {
+ // Any packed type is legal, so the legalizer will not have lowered ISD::LOAD
+ // -> NVPTXISD::Load. We have to do it here.
+ case ISD::LOAD:
+ Opcode = NVPTXISD::LoadV2;
+ {
+ Operands.push_back(DCI.DAG.getIntPtrConstant(
+ cast<LoadSDNode>(LD)->getExtensionType(), DL));
+ Align Alignment = LD->getAlign();
+ const auto &TD = DCI.DAG.getDataLayout();
+ Align PrefAlign =
+ TD.getPrefTypeAlign(MemVT.getTypeForEVT(*DCI.DAG.getContext()));
+ if (Alignment < PrefAlign) {
+ // This load is not sufficiently aligned, so bail out and let this
+ // vector load be scalarized. Note that we may still be able to emit
+ // smaller vector loads. For example, if we are loading a <4 x float>
+ // with an alignment of 8, this check will fail but the legalizer will
+ // try again with 2 x <2 x float>, which will succeed with an alignment
+ // of 8.
+ return SDValue();
+ }
+ }
+ break;
+ case NVPTXISD::LoadParamV2:
+ Opcode = NVPTXISD::LoadParamV4;
+ break;
+ case NVPTXISD::LoadV2:
+ Opcode = NVPTXISD::LoadV4;
+ break;
+ case NVPTXISD::LoadV4:
+ // PTX doesn't support v8 for 16-bit values
+ case NVPTXISD::LoadV8:
+ // PTX doesn't support the next doubling of outputs
+ return SDValue();
+ }
+
+ SmallVector<EVT> NewVTs;
+ for (EVT VT : LD->values()) {
+ if (VT == ElemVT) {
+ const EVT ScalarVT = ElemVT.getVectorElementType();
+ NewVTs.insert(NewVTs.end(), {ScalarVT, ScalarVT});
+ } else
+ NewVTs.push_back(VT);
+ }
----------------
Prince781 wrote:
Done.
https://github.com/llvm/llvm-project/pull/144581
More information about the llvm-commits
mailing list