[llvm] Enable generic overlapping optimization for memmove (PR #177885)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 25 14:42:38 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Osama Abdelkader (osamakader)
<details>
<summary>Changes</summary>
Fixes: #<!-- -->165948
---
Full diff: https://github.com/llvm/llvm-project/pull/177885.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+14-1)
- (modified) llvm/test/CodeGen/AArch64/memmove-inline.ll (+98)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4ca1bb053fce5..83774fedb0005 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8967,7 +8967,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (!TLI.findOptimalMemOpLowering(
C, MemOps, Limit,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
- /*IsVolatile*/ true),
+ isVol),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();
@@ -9008,6 +9008,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
+ if (i == NumMemOps - 1 && i != 0 && VTSize > Size - SrcOff) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ SrcOff -= VTSize - (Size - SrcOff);
+ }
+
bool isDereferenceable =
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
@@ -9024,11 +9030,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
OutChains.clear();
+ DstOff = 0;
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
+ if (i == NumMemOps - 1 && i != 0 && VTSize > Size - DstOff) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ DstOff -= VTSize - (Size - DstOff);
+ }
+
Store = DAG.getStore(
Chain, dl, LoadValues[i],
DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)),
diff --git a/llvm/test/CodeGen/AArch64/memmove-inline.ll b/llvm/test/CodeGen/AArch64/memmove-inline.ll
index 641c48dd0f1c5..4ca180616442d 100644
--- a/llvm/test/CodeGen/AArch64/memmove-inline.ll
+++ b/llvm/test/CodeGen/AArch64/memmove-inline.ll
@@ -120,3 +120,101 @@ entry:
}
declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
+
+; Test overlapping memmove optimization for non-power-of-two sizes
+; These should use overlapping loads/stores instead of mixed-size operations
+
+define void @move7(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move7:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldur w8, [x1, #3]
+; CHECK-ALIGNED-NEXT: ldr w9, [x1]
+; CHECK-ALIGNED-NEXT: stur w8, [x0, #3]
+; CHECK-ALIGNED-NEXT: str w9, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 7, i1 false)
+ ret void
+}
+
+define void @move13(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move13:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldur x8, [x1, #5]
+; CHECK-ALIGNED-NEXT: ldr x9, [x1]
+; CHECK-ALIGNED-NEXT: stur x8, [x0, #5]
+; CHECK-ALIGNED-NEXT: str x9, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 13, i1 false)
+ ret void
+}
+
+define void @move15(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move15:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldur x8, [x1, #7]
+; CHECK-ALIGNED-NEXT: ldr x9, [x1]
+; CHECK-ALIGNED-NEXT: stur x8, [x0, #7]
+; CHECK-ALIGNED-NEXT: str x9, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 15, i1 false)
+ ret void
+}
+
+define void @move25(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move25:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldur q0, [x1, #9]
+; CHECK-ALIGNED-NEXT: ldr q1, [x1]
+; CHECK-ALIGNED-NEXT: stur q0, [x0, #9]
+; CHECK-ALIGNED-NEXT: str q1, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 25, i1 false)
+ ret void
+}
+
+define void @move33(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move33:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldp q1, q0, [x1]
+; CHECK-ALIGNED-NEXT: ldrb w8, [x1, #32]
+; CHECK-ALIGNED-NEXT: strb w8, [x0, #32]
+; CHECK-ALIGNED-NEXT: stp q1, q0, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 33, i1 false)
+ ret void
+}
+
+define void @move49(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move49:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldp q2, q0, [x1, #16]
+; CHECK-ALIGNED-NEXT: ldrb w8, [x1, #48]
+; CHECK-ALIGNED-NEXT: ldr q1, [x1]
+; CHECK-ALIGNED-NEXT: strb w8, [x0, #48]
+; CHECK-ALIGNED-NEXT: stp q2, q0, [x0, #16]
+; CHECK-ALIGNED-NEXT: str q1, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 49, i1 false)
+ ret void
+}
+
+define void @move65(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: move65:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldp q0, q1, [x1, #32]
+; CHECK-ALIGNED-NEXT: ldrb w8, [x1, #64]
+; CHECK-ALIGNED-NEXT: ldp q2, q3, [x1]
+; CHECK-ALIGNED-NEXT: strb w8, [x0, #64]
+; CHECK-ALIGNED-NEXT: stp q0, q1, [x0, #32]
+; CHECK-ALIGNED-NEXT: stp q2, q3, [x0]
+; CHECK-ALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 65, i1 false)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/177885
More information about the llvm-commits
mailing list