[llvm] r306209 - [SelectionDAG] set dereferenceable flag when expanding memcpy/memmove

Hiroshi Inoue via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 24 08:17:40 PDT 2017


Author: inouehrs
Date: Sat Jun 24 10:17:38 2017
New Revision: 306209

URL: http://llvm.org/viewvc/llvm-project?rev=306209&view=rev
Log:
[SelectionDAG] set dereferenceable flag when expanding memcpy/memmove

When SelectionDAG expands memcpy (or memmove) call into a sequence of load and store instructions, it disregards dereferenceable flag even the source pointer is known to be dereferenceable.
This results in an assertion failure if SelectionDAG commonizes a load instruction generated for memcpy with another load instruction for the source pointer.
This patch makes SelectionDAG to set the dereferenceable flag for the load instructions properly to avoid the assertion failure.

Differential Revision: https://reviews.llvm.org/D34467



Added:
    llvm/trunk/test/CodeGen/PowerPC/memcpy_dereferenceable.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/MachineMemOperand.h
    llvm/trunk/lib/CodeGen/MachineInstr.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Modified: llvm/trunk/include/llvm/CodeGen/MachineMemOperand.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineMemOperand.h?rev=306209&r1=306208&r2=306209&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineMemOperand.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineMemOperand.h Sat Jun 24 10:17:38 2017
@@ -59,6 +59,11 @@ struct MachinePointerInfo {
     return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
   }
 
+  /// Return true if memory region [V, V+Offset+Size) is known to be
+  /// dereferenceable.
+  bool isDereferenceable(unsigned Size, LLVMContext &C,
+                         const DataLayout &DL) const;
+
   /// Return the LLVM IR address space number that this pointer points into.
   unsigned getAddrSpace() const;
 

Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=306209&r1=306208&r2=306209&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Sat Jun 24 10:17:38 2017
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpac
   return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
 }
 
+/// isDereferenceable - Return true if V is always dereferenceable for 
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+                                           const DataLayout &DL) const {
+  if (!V.is<const Value*>())
+    return false;
+
+  const Value *BasePtr = V.get<const Value*>();
+  if (BasePtr == nullptr)
+    return false;
+
+  return isDereferenceableAndAlignedPointer(BasePtr, 1,
+                                            APInt(DL.getPointerSize(),
+                                                  Offset + Size),
+                                            DL);
+}
+
 /// getConstantPool - Return a MachinePointerInfo record that refers to the
 /// constant pool.
 MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=306209&r1=306208&r2=306209&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Sat Jun 24 10:17:38 2017
@@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(S
   // TODO: In the AlwaysInline case, if the size is big then generate a loop
   // rather than maybe a humongous number of loads and stores.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const DataLayout &DL = DAG.getDataLayout();
+  LLVMContext &C = *DAG.getContext();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
@@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(S
     return SDValue();
 
   if (DstAlignCanChange) {
-    Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+    Type *Ty = MemOps[0].getTypeForEVT(C);
+    unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
 
     // Don't promote to an alignment that would require dynamic stack
     // realignment.
     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
     if (!TRI->needsStackRealignment(MF))
       while (NewAlign > Align &&
-             DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+             DL.exceedsNaturalStackAlignment(NewAlign))
           NewAlign /= 2;
 
     if (NewAlign > Align) {
@@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(S
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
       // to Load/Store if NVT==VT.
       // FIXME does the case above also need this?
-      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+      EVT NVT = TLI.getTypeToTransformTo(C, VT);
       assert(NVT.bitsGE(VT));
+
+      bool isDereferenceable =
+        SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+      MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+      if (isDereferenceable)
+        SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              DAG.getMemBasePlusOffset(Src, SrcOff, dl),
                              SrcPtrInfo.getWithOffset(SrcOff), VT,
-                             MinAlign(SrcAlign, SrcOff), MMOFlags);
+                             MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
       OutChains.push_back(Value.getValue(1));
       Store = DAG.getTruncStore(
           Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(
   // Expand memmove to a series of load and store ops if the size operand falls
   // below a certain threshold.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const DataLayout &DL = DAG.getDataLayout();
+  LLVMContext &C = *DAG.getContext();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
@@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(
     return SDValue();
 
   if (DstAlignCanChange) {
-    Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+    Type *Ty = MemOps[0].getTypeForEVT(C);
+    unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value;
 
+    bool isDereferenceable =
+      SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+    MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+    if (isDereferenceable)
+      SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
     Value =
         DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
-                    SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+                    SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;

Added: llvm/trunk/test/CodeGen/PowerPC/memcpy_dereferenceable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/memcpy_dereferenceable.ll?rev=306209&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/memcpy_dereferenceable.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/memcpy_dereferenceable.ll Sat Jun 24 10:17:38 2017
@@ -0,0 +1,74 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy
+
+; CHECK-LABEL: @func
+; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOT: lxvd2x
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+
+define void @func(i1 %flag) {
+entry:
+  %pairs = alloca [4 x <2 x i64>], align 8
+  %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
+  %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
+  %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
+  %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
+  %dst = bitcast [4 x <2 x i64>]* %pairs to i8*
+  %src = bitcast <2 x i64>* %pair2 to i8*
+  br i1 %flag, label %end, label %dummy
+
+end:
+  ; copy third element into first element by memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
+  ; copy third element into second element by LD/ST
+  %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
+  store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
+  ret void
+
+dummy:
+  ; to make use of %src in another BB
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
+  br label %end
+}
+
+
+; CHECK-LABEL: @func2
+; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOT: lxvd2x
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+
+define void @func2(i1 %flag) {
+entry:
+  %pairs = alloca [4 x <2 x i64>], align 8
+  %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
+  %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
+  %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
+  %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
+  %dst = bitcast [4 x <2 x i64>]* %pairs to i8*
+  %src = bitcast <2 x i64>* %pair2 to i8*
+  br i1 %flag, label %end, label %dummy
+
+end:
+  ; copy third element into first element by memcpy
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
+  ; copy third element into second element by LD/ST
+  %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
+  store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
+  ret void
+
+dummy:
+  ; to make use of %src in another BB
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
+  br label %end
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #1 = { argmemonly nounwind }




More information about the llvm-commits mailing list