[llvm] [CodeGen] Generate mem intrinsic address calculations with nuw (PR #80184)

Derek Schuff via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 11:46:12 PST 2024


https://github.com/dschuff updated https://github.com/llvm/llvm-project/pull/80184

>From 8fce40a38370f92926f1dabbc00c29e2d48b46e7 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Tue, 30 Jan 2024 17:39:00 -0800
Subject: [PATCH 1/5] Use getObjectPtrOffset to generate constant offsets for
 memcpy/memset

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 ++++++++++++++-----
 llvm/lib/IR/Function.cpp                      |  1 +
 .../WebAssembly/mem-intrinsics-offsets.ll     | 14 ++++++++++++++
 3 files changed, 29 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3c1343836187a..45ce8b75cb1e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7571,17 +7571,21 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
         SrcMMOFlags |= MachineMemOperand::MODereferenceable;
       if (isConstant)
         SrcMMOFlags |= MachineMemOperand::MOInvariant;
-
+llvm::errs() << "isDereferenceable " << isDereferenceable<<'\n';
       Value = DAG.getExtLoad(
           ISD::EXTLOAD, dl, NVT, Chain,
-          DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+          isDereferenceable ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)) :
+            DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
           SrcPtrInfo.getWithOffset(SrcOff), VT,
           commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
       OutLoadChains.push_back(Value.getValue(1));
 
+      isDereferenceable =
+        DstPtrInfo.getWithOffset(DstOff).isDereferenceable(VTSize, C, DL);
       Store = DAG.getTruncStore(
           Chain, dl, Value,
-          DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+          isDereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
+            DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
           DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
       OutStoreChains.push_back(Store);
     }
@@ -7715,7 +7719,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
     if (isDereferenceable)
       SrcMMOFlags |= MachineMemOperand::MODereferenceable;
-
+// TODO: Fix memmove too.
     Value = DAG.getLoad(
         VT, dl, Chain,
         DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
@@ -7863,9 +7867,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
         Value = getMemsetValue(Src, VT, DAG, dl);
     }
     assert(Value.getValueType() == VT && "Value with wrong type.");
+    bool Dereferenceable = DstPtrInfo.isDereferenceable(DstOff, *DAG.getContext(), DAG.getDataLayout());
+    llvm::errs() << llvm::format(" calling, dstoff %d deref is %d", DstOff, Dereferenceable)<<"\n";
+
     SDValue Store = DAG.getStore(
         Chain, dl, Value,
-        DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        Dereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
+          DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
         DstPtrInfo.getWithOffset(DstOff), Alignment,
         isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
         NewAAInfo);
@@ -8112,6 +8120,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   // For cases within the target-specified limits, this is the best choice.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (ConstantSize) {
+    llvm::errs() << "Constant size\n";
     // Memset with size zero? Just return the original chain.
     if (ConstantSize->isZero())
       return Chain;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 22e2455462bf4..2ae3cc3081165 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -242,6 +242,7 @@ Type *Argument::getParamInAllocaType() const {
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
+         this->dump();
   return getParent()->getParamDereferenceableBytes(getArgNo());
 }
 
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
new file mode 100644
index 0000000000000..9890a0383ed30
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %
+
+target triple = "wasm32-unknown-unknown"
+
+define void @call_memset(ptr dereferenceable(16)) #0 {
+    call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
+    ret void
+}
+
+define void @call_memcpy(ptr dereferenceable(16) %dst, ptr dereferenceable(16) %src) #0 {
+    call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+    ret void
+}

>From 6e0993209b44f9be2ac4aed04a1a3ea242d8d6f5 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Wed, 31 Jan 2024 11:11:16 -0800
Subject: [PATCH 2/5] remove debug prints, reformat

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 ++++++-------
 llvm/lib/IR/Function.cpp                       |  1 -
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45ce8b75cb1e6..a52bbdf92cf8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7571,7 +7571,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
         SrcMMOFlags |= MachineMemOperand::MODereferenceable;
       if (isConstant)
         SrcMMOFlags |= MachineMemOperand::MOInvariant;
-llvm::errs() << "isDereferenceable " << isDereferenceable<<'\n';
+
       Value = DAG.getExtLoad(
           ISD::EXTLOAD, dl, NVT, Chain,
           isDereferenceable ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)) :
@@ -7867,13 +7867,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
         Value = getMemsetValue(Src, VT, DAG, dl);
     }
     assert(Value.getValueType() == VT && "Value with wrong type.");
-    bool Dereferenceable = DstPtrInfo.isDereferenceable(DstOff, *DAG.getContext(), DAG.getDataLayout());
-    llvm::errs() << llvm::format(" calling, dstoff %d deref is %d", DstOff, Dereferenceable)<<"\n";
-
+    bool isDereferenceable = DstPtrInfo.isDereferenceable(
+        DstOff, *DAG.getContext(), DAG.getDataLayout());
     SDValue Store = DAG.getStore(
         Chain, dl, Value,
-        Dereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
-          DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        isDereferenceable
+            ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff))
+            : DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
         DstPtrInfo.getWithOffset(DstOff), Alignment,
         isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
         NewAAInfo);
@@ -8120,7 +8120,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   // For cases within the target-specified limits, this is the best choice.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (ConstantSize) {
-    llvm::errs() << "Constant size\n";
     // Memset with size zero? Just return the original chain.
     if (ConstantSize->isZero())
       return Chain;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 2ae3cc3081165..22e2455462bf4 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -242,7 +242,6 @@ Type *Argument::getParamInAllocaType() const {
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
-         this->dump();
   return getParent()->getParamDereferenceableBytes(getArgNo());
 }
 

>From 108e14ac38583bd5848e1e6bf423d375c46bee39 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Wed, 31 Jan 2024 11:11:26 -0800
Subject: [PATCH 3/5] autogenerate test expectation

---
 .../WebAssembly/mem-intrinsics-offsets.ll        | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
index 9890a0383ed30..15e68ab4122f9 100644
--- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -4,11 +4,27 @@
 target triple = "wasm32-unknown-unknown"
 
 define void @call_memset(ptr dereferenceable(16)) #0 {
+; CHECK-LABEL: call_memset:
+; CHECK:         .functype call_memset (i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.const $push0=, 0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.const $push1=, 0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    return
     call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
     ret void
 }
 
 define void @call_memcpy(ptr dereferenceable(16) %dst, ptr dereferenceable(16) %src) #0 {
+; CHECK-LABEL: call_memcpy:
+; CHECK:         .functype call_memcpy (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT:    i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT:    i64.load $push1=, 0($1):p2align=0
+; CHECK-NEXT:    i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT:    return
     call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
     ret void
 }

>From 1f0d980ddea1e115d630ec93493b6fb966977040 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Wed, 31 Jan 2024 11:44:50 -0800
Subject: [PATCH 4/5] fix test invocation

---
 llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
index 15e68ab4122f9..76cf6d5e1ace0 100644
--- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %
+; RUN: llc < %s -mcpu=mvp -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 

>From 99eddb8f5b684ba74be41ad48ca61478c2ecdb3b Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Wed, 31 Jan 2024 11:45:57 -0800
Subject: [PATCH 5/5] fix format

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a52bbdf92cf8d..6b90171f7c227 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7574,18 +7574,20 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
       Value = DAG.getExtLoad(
           ISD::EXTLOAD, dl, NVT, Chain,
-          isDereferenceable ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)) :
-            DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
+          isDereferenceable
+              ? DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff))
+              : DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
           SrcPtrInfo.getWithOffset(SrcOff), VT,
           commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
       OutLoadChains.push_back(Value.getValue(1));
 
       isDereferenceable =
-        DstPtrInfo.getWithOffset(DstOff).isDereferenceable(VTSize, C, DL);
+          DstPtrInfo.getWithOffset(DstOff).isDereferenceable(VTSize, C, DL);
       Store = DAG.getTruncStore(
           Chain, dl, Value,
-          isDereferenceable ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)) :
-            DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+          isDereferenceable
+              ? DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff))
+              : DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
           DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
       OutStoreChains.push_back(Store);
     }
@@ -7719,7 +7721,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
     if (isDereferenceable)
       SrcMMOFlags |= MachineMemOperand::MODereferenceable;
-// TODO: Fix memmove too.
+    // TODO: Fix memmove too.
     Value = DAG.getLoad(
         VT, dl, Chain,
         DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),



More information about the llvm-commits mailing list