[llvm] [DAGCombiner] Refactor and improve ReduceLoadOpStoreWidth (PR #119564)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 15 08:25:51 PST 2024
================
@@ -20367,68 +20371,69 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewBW >= BitWidth)
return SDValue();
- // TODO: For big-endian we probably want to align given the most significant
- // bit being modified instead of adjusting ShAmt based on least significant
- // bits. This to reduce the risk of failing on the alignment check below. If
- // for example VT.getStoreSize()==5 and Imm is 0x0000ffff00, then we want to
- // find NewBW=16, and we want to load/store with a PtrOff set to 2. But then
- // ShAmt should be set to 8, which isn't a multiple of NewBW. But given
- // that isNarrowingProfitable doesn't seem to be overridden for any in-tree
- // big-endian target, then the support for big-endian here isn't covered by
- // any in-tree lit tests, so it is unfortunately not highly optimized
- // either. It should be possible to improve that by using
- // ReduceLoadOpStoreWidthForceNarrowingProfitable.
-
- // If the lsb that is modified does not start at the type bitwidth boundary,
- // align to start at the previous boundary.
- ShAmt = ShAmt - (ShAmt % NewBW);
-
- // Make sure we do not access memory outside the memory touched by the
- // original load/store.
- if (ShAmt + NewBW > VT.getStoreSizeInBits())
- return SDValue();
+ // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
+ // large enough to cover all bits that should be modified. This type might
+ // however be larger than really needed (such as i32 while we actually only
+ // need to modify one byte). Now we need to find our how to align the memory
+ // accesses to satisfy preferred alignments as well as avoiding to access
+ // memory outside the store size of the orignal access.
+
+ unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
+
+ // Let ShAmt denote amount of bits to skip, counted from the least
+ // significant bits of Imm. And let PtrOff how much the pointer needs to be
+ // offsetted (in bytes) for the new access.
+ unsigned ShAmt = 0;
+ uint64_t PtrOff = 0;
+ for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
+ // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
+ if (ShAmt > LSB)
+ return SDValue();
+ if (ShAmt + NewBW < MSB)
+ continue;
- APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
- std::min(BitWidth, ShAmt + NewBW));
- if ((Imm & Mask) == Imm) {
- APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
- if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnes(NewBW);
- uint64_t PtrOff = ShAmt / 8;
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (DAG.getDataLayout().isBigEndian())
- PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ // Calculate PtrOff.
+ unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
+ ? VTStoreSize - NewBW - ShAmt
+ : ShAmt;
+ PtrOff = PtrAdjustmentInBits / 8;
+ // Now check if narrow access is allowed and fast, considering alignments.
unsigned IsFast = 0;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
- LD->getAddressSpace(), NewAlign,
- LD->getMemOperand()->getFlags(), &IsFast) ||
- !IsFast)
- return SDValue();
-
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
- SDValue NewLD =
- DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
- LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
- SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
- DAG.getConstant(NewImm, SDLoc(Value),
- NewVT));
- SDValue NewST =
- DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
- ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
-
- AddToWorklist(NewPtr.getNode());
- AddToWorklist(NewLD.getNode());
- AddToWorklist(NewVal.getNode());
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
- ++OpsNarrowed;
- return NewST;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) &&
+ IsFast)
+ break;
}
+ // If loop above did not find any accepted ShAmt we need to exit here.
+ if (ShAmt + NewBW > VTStoreSize)
+ return SDValue();
+
+ APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnes(NewBW);
----------------
RKSimon wrote:
`NewImm.flipAllBits();` or `NewImm = ~NewImm;`?
https://github.com/llvm/llvm-project/pull/119564
More information about the llvm-commits
mailing list