[llvm] [llvm][ARM][CodeGen] Disable MEMCPY LDM/STM inlining for Cortex v7-m (PR #106378)

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 18 07:51:23 PDT 2024


================
@@ -138,6 +138,116 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
   return CallResult.second;
 }
 
+SDValue ARMSelectionDAGInfo::EmitMemcpyAsLdSt(
+    SelectionDAG &DAG, SDLoc dl, const ARMSubtarget &Subtarget, SDValue Chain,
+    SDValue Dst, SDValue Src, uint64_t SizeVal, bool isVolatile,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  // Do repeated batches of 4-byte loads and stores.
+  unsigned BytesLeft = SizeVal & 3;
+  unsigned NumMemOps = SizeVal >> 2;
+  unsigned EmittedNumMemOps = 0;
+  EVT VT = MVT::i32;
+  unsigned VTSize = 4;
+  unsigned i = 0;
+  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+  const unsigned MaxLoads = Subtarget.isThumb1Only() ? 4 : 6;
+  SDValue TFOps[6];
+  SDValue Loads[6];
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  MachineMemOperand::Flags MOFlags = MachineMemOperand::Flags::MONone;
+  if (isVolatile)
+    MOFlags = MachineMemOperand::Flags::MOVolatile;
+  MachineMemOperand::Flags LoadMOFlags = MOFlags;
+  if (SrcPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
+                                   DAG.getDataLayout()))
+    LoadMOFlags |= MachineMemOperand::Flags::MODereferenceable;
+  if (auto *V = SrcPtrInfo.V.dyn_cast<const Value *>())
+    if (isa<GlobalVariable>(V) && cast<GlobalVariable>(V)->isConstant())
+      LoadMOFlags |= MachineMemOperand::Flags::MOInvariant;
+  MachineMemOperand::Flags StoreMOFlags = MOFlags;
+  if (DstPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
+                                   DAG.getDataLayout()))
+    StoreMOFlags |= MachineMemOperand::Flags::MODereferenceable;
+
+  // Emit up to MaxLoads loads, then a TokenFactor barrier, then the
+  // same number of stores.  The loads and stores may get combined into
+  // ldm/stm later on.
+  while (EmittedNumMemOps < NumMemOps) {
+    for (i = 0; i < MaxLoads && EmittedNumMemOps + i < NumMemOps; ++i) {
+      Loads[i] = DAG.getLoad(VT, dl, Chain,
+                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                         DAG.getConstant(SrcOff, dl, MVT::i32)),
+                             SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
+                             LoadMOFlags);
+      TFOps[i] = Loads[i].getValue(1);
+      SrcOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
+
+    for (i = 0; i < MaxLoads && EmittedNumMemOps + i < NumMemOps; ++i) {
+      TFOps[i] = DAG.getStore(
+          Chain, dl, Loads[i],
+          DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                      DAG.getConstant(DstOff, dl, MVT::i32)),
+          DstPtrInfo.getWithOffset(DstOff), MaybeAlign(0), StoreMOFlags);
+      DstOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
+
+    EmittedNumMemOps += i;
+  }
+
+  if (BytesLeft == 0)
+    return Chain;
+
+  // Issue loads / stores for the trailing (1 - 3) bytes.
+  unsigned BytesLeftSave = BytesLeft;
+  i = 0;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    Loads[i] = DAG.getLoad(VT, dl, Chain,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
+                           SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
+                           LoadMOFlags);
+    TFOps[i] = Loads[i].getValue(1);
+    ++i;
+    SrcOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
+
+  i = 0;
+  BytesLeft = BytesLeftSave;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
----------------
nasherm wrote:

Unfortunately no. There's a data dependency between the `Chain` variable

https://github.com/llvm/llvm-project/pull/106378


More information about the llvm-commits mailing list