[llvm] [CodeGen] Generate mem intrinsic address calculations with nuw (PR #80184)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 11:35:22 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-webassembly

Author: Derek Schuff (dschuff)

<details>
<summary>Changes</summary>

When directly generating loads/stores for small constant memset/memcpy intrinsics, 
this change as written uses `DAG.getObjectPtrOffset` to generate address arithmetic
with 'nuw' when the src/dst pointers are known to be dereferenceable. 
For WebAssembly, this allows the arithmetic to be folded directly into the load/store
constant offset field.

See #<!-- -->79692

---
Full diff: https://github.com/llvm/llvm-project/pull/80184.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+12-4) 
- (added) llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll (+30) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3c1343836187a..a52bbdf92cf8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7574,14 +7574,18 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
       Value = DAG.getExtLoad(
           ISD::EXTLOAD, dl, NVT, Chain,
-          DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+          isDereferenceable ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)) :
+            DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
           SrcPtrInfo.getWithOffset(SrcOff), VT,
           commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
       OutLoadChains.push_back(Value.getValue(1));
 
+      isDereferenceable =
+        DstPtrInfo.getWithOffset(DstOff).isDereferenceable(VTSize, C, DL);
       Store = DAG.getTruncStore(
           Chain, dl, Value,
-          DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+          isDereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
+            DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
           DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
       OutStoreChains.push_back(Store);
     }
@@ -7715,7 +7719,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
     if (isDereferenceable)
       SrcMMOFlags |= MachineMemOperand::MODereferenceable;
-
+// TODO: Fix memmove too.
     Value = DAG.getLoad(
         VT, dl, Chain,
         DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
@@ -7863,9 +7867,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
         Value = getMemsetValue(Src, VT, DAG, dl);
     }
     assert(Value.getValueType() == VT && "Value with wrong type.");
+    bool isDereferenceable = DstPtrInfo.isDereferenceable(
+        DstOff, *DAG.getContext(), DAG.getDataLayout());
     SDValue Store = DAG.getStore(
         Chain, dl, Value,
-        DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        isDereferenceable
+            ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff))
+            : DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
         DstPtrInfo.getWithOffset(DstOff), Alignment,
         isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
         NewAAInfo);
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
new file mode 100644
index 0000000000000..15e68ab4122f9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %
+
+target triple = "wasm32-unknown-unknown"
+
+define void @call_memset(ptr dereferenceable(16)) #0 {
+; CHECK-LABEL: call_memset:
+; CHECK:         .functype call_memset (i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.const $push0=, 0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.const $push1=, 0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    return
+    call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
+    ret void
+}
+
+define void @call_memcpy(ptr dereferenceable(16) %dst, ptr dereferenceable(16) %src) #0 {
+; CHECK-LABEL: call_memcpy:
+; CHECK:         .functype call_memcpy (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.load $push1=, 0($1):p2align=0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    return
+    call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+    ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/80184


More information about the llvm-commits mailing list