[llvm] [CodeGen] Generate mem intrinsic address calculations with nuw (PR #80184)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 31 11:35:22 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
Author: Derek Schuff (dschuff)
<details>
<summary>Changes</summary>
When directly generating loads/stores for small constant memset/memcpy intrinsics,
this change as written uses `DAG.getObjectPtrOffset` to generate address arithmetic
with 'nuw' when the src/dst pointers are known to be dereferenceable.
For WebAssembly, this allows the arithmetic to be folded directly into the load/store
constant offset field.
See #<!-- -->79692
---
Full diff: https://github.com/llvm/llvm-project/pull/80184.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+12-4)
- (added) llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll (+30)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3c1343836187a..a52bbdf92cf8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7574,14 +7574,18 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(
ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+ isDereferenceable ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)) :
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
OutLoadChains.push_back(Value.getValue(1));
+ isDereferenceable =
+ DstPtrInfo.getWithOffset(DstOff).isDereferenceable(VTSize, C, DL);
Store = DAG.getTruncStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+ isDereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
OutStoreChains.push_back(Store);
}
@@ -7715,7 +7719,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
-
+// TODO: Fix memmove too.
Value = DAG.getLoad(
VT, dl, Chain,
DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
@@ -7863,9 +7867,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
+ bool isDereferenceable = DstPtrInfo.isDereferenceable(
+ DstOff, *DAG.getContext(), DAG.getDataLayout());
SDValue Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+ isDereferenceable
+ ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff))
+ : DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
NewAAInfo);
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
new file mode 100644
index 0000000000000..15e68ab4122f9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %
+
+target triple = "wasm32-unknown-unknown"
+
+define void @call_memset(ptr dereferenceable(16)) #0 {
+; CHECK-LABEL: call_memset:
+; CHECK: .functype call_memset (i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.const $push0=, 0
+; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT: i64.const $push1=, 0
+; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT: return
+ call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
+ ret void
+}
+
+define void @call_memcpy(ptr dereferenceable(16) %dst, ptr dereferenceable(16) %src) #0 {
+; CHECK-LABEL: call_memcpy:
+; CHECK: .functype call_memcpy (i32, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT: i64.load $push1=, 0($1):p2align=0
+; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT: return
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/80184
More information about the llvm-commits
mailing list