[llvm] r256890 - [SelectionDAGBuilder] Set NoUnsignedWrap for inbounds gep and load/store offsets.

Tue Jan 5 16:43:08 PST 2016

Author: djg
Date: Tue Jan  5 18:43:06 2016
New Revision: 256890

URL: http://llvm.org/viewvc/llvm-project?rev=256890&view=rev
Log:
[SelectionDAGBuilder] Set NoUnsignedWrap for inbounds gep and load/store offsets.

In an inbounds getelementptr, when an index produces a constant non-negative
offset to add to the base, the add can be assumed to not have unsigned overflow.

This relies on the assumption that addresses can't occupy more than half the
address space, which isn't possible in C because it wouldn't be possible to
represent the difference between the start of the object and one-past-the-end
in a ptrdiff_t.

Setting the NoUnsignedWrap flag is theoretically useful in general, and is
specifically useful to the WebAssembly backend, since it permits stronger
constant offset folding.

Differential Revision: http://reviews.llvm.org/D15544

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/test/CodeGen/WebAssembly/offset.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=256890&r1=256889&r2=256890&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan  5 18:43:06 2016
@@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDN
   uint64_t PtrOff = ShAmt / 8;
   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   SDLoc DL(LN0);
+  // The original load itself didn't wrap, so an offset within it doesn't.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, DL, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType),
+                               &Flags);
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=256890&r1=256889&r2=256890&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan  5 18:43:06 2016
@@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const
     ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
     unsigned NumValues = ValueVTs.size();
 
+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
     SmallVector<SDValue, 4> Chains(NumValues);
     for (unsigned i = 0; i != NumValues; ++i) {
       SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
                                 RetPtr.getValueType(), RetPtr,
                                 DAG.getIntPtrConstant(Offsets[i],
-                                                      getCurSDLoc()));
+                                                      getCurSDLoc()),
+                                &Flags);
       Chains[i] =
         DAG.getStore(Chain, getCurSDLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElemen
       if (Field) {
         // N = N + Offset
         uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
-                        DAG.getConstant(Offset, dl, N.getValueType()));
+                        DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
       }
 
       Ty = StTy->getElementType(Field);
@@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElemen
         SDValue OffsVal = VectorWidth ?
           DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
           DAG.getConstant(Offs, dl, PtrTy);
-        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
         continue;
       }
 
@@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(co
     Align = 0;
 
   // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size.
+  // by add SA-1 to the size. This doesn't overflow because we're computing
+  // an address inside an alloca.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
   AllocSize = DAG.getNode(ISD::ADD, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign - 1, dl));
+                          DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
 
   // Mask out the low bits for alignment purposes.
   AllocSize = DAG.getNode(ISD::AND, dl,
@@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(cons
   if (isVolatile)
     Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
 
+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
   EVT PtrVT = Ptr.getValueType();
@@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(cons
     }
     SDValue A = DAG.getNode(ISD::ADD, dl,
                             PtrVT, Ptr,
-                            DAG.getConstant(Offsets[i], dl, PtrVT));
+                            DAG.getConstant(Offsets[i], dl, PtrVT),
+                            &Flags);
     SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
                             isNonTemporal, isInvariant, Alignment, AAInfo,
@@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(con
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
 
+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
   unsigned ChainI = 0;
   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
     // See visitLoad comments.
@@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(con
       ChainI = 0;
     }
     SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], dl, PtrVT));
+                              DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
     SDValue St = DAG.getStore(Root, dl,
                               SDValue(Src.getNode(), Src.getResNo() + i),
                               Add, MachinePointerInfo(PtrV, Offsets[i]),
@@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLoweri
     ReturnValues.resize(NumValues);
     SmallVector<SDValue, 4> Chains(NumValues);
 
+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
     for (unsigned i = 0; i < NumValues; ++i) {
       SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
                                     CLI.DAG.getConstant(Offsets[i], CLI.DL,
-                                                        PtrVT));
+                                                        PtrVT), &Flags);
       SDValue L = CLI.DAG.getLoad(
           RetTys[i], CLI.DL, CLI.Chain, Add,
           MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),

Modified: llvm/trunk/test/CodeGen/WebAssembly/offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/offset.ll?rev=256890&r1=256889&r2=256890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/offset.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll Tue Jan  5 18:43:06 2016
@@ -17,6 +17,28 @@ define i32 @load_i32_with_folded_offset(
   ret i32 %t
 }
 
+; With an inbounds gep, we can fold an offset.
+
+; CHECK-LABEL: load_i32_with_folded_gep_offset:
+; CHECK: i32.load  $push0=, 24($0){{$}}
+define i32 @load_i32_with_folded_gep_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; We can't fold a negative offset though, even with an inbounds gep.
+
+; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.load  $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 -6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
 ; Without nuw, and even with nsw, we can't fold an offset.
 
 ; CHECK-LABEL: load_i32_with_unfolded_offset:
@@ -31,6 +53,18 @@ define i32 @load_i32_with_unfolded_offse
   ret i32 %t
 }
 
+; Without inbounds, we can't fold a gep offset.
+
+; CHECK-LABEL: load_i32_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.load  $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_gep_offset(i32* %p) {
+  %s = getelementptr i32, i32* %p, i32 6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
 ; Same as above but with i64.
 
 ; CHECK-LABEL: load_i64_with_folded_offset:
@@ -45,6 +79,28 @@ define i64 @load_i64_with_folded_offset(
 
 ; Same as above but with i64.
 
+; CHECK-LABEL: load_i64_with_folded_gep_offset:
+; CHECK: i64.load  $push0=, 24($0){{$}}
+define i64 @load_i64_with_folded_gep_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.load  $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 -3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with i64.
+
 ; CHECK-LABEL: load_i64_with_unfolded_offset:
 ; CHECK: i32.const $push0=, 24{{$}}
 ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -57,6 +113,18 @@ define i64 @load_i64_with_unfolded_offse
   ret i64 %t
 }
 
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.load  $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_gep_offset(i64* %p) {
+  %s = getelementptr i64, i64* %p, i32 3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
 ; Same as above but with store.
 
 ; CHECK-LABEL: store_i32_with_folded_offset:
@@ -71,6 +139,28 @@ define void @store_i32_with_folded_offse
 
 ; Same as above but with store.
 
+; CHECK-LABEL: store_i32_with_folded_gep_offset:
+; CHECK: i32.store $discard=, 24($0), $pop0{{$}}
+define void @store_i32_with_folded_gep_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 6
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 -6
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store.
+
 ; CHECK-LABEL: store_i32_with_unfolded_offset:
 ; CHECK: i32.const $push0=, 24{{$}}
 ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -83,6 +173,18 @@ define void @store_i32_with_unfolded_off
   ret void
 }
 
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_gep_offset(i32* %p) {
+  %s = getelementptr i32, i32* %p, i32 6
+  store i32 0, i32* %s
+  ret void
+}
+
 ; Same as above but with store with i64.
 
 ; CHECK-LABEL: store_i64_with_folded_offset:
@@ -97,6 +199,28 @@ define void @store_i64_with_folded_offse
 
 ; Same as above but with store with i64.
 
+; CHECK-LABEL: store_i64_with_folded_gep_offset:
+; CHECK: i64.store $discard=, 24($0), $pop0{{$}}
+define void @store_i64_with_folded_gep_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 3
+  store i64 0, i64* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 -3
+  store i64 0, i64* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
 ; CHECK-LABEL: store_i64_with_unfolded_offset:
 ; CHECK: i32.const $push0=, 24{{$}}
 ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -109,6 +233,18 @@ define void @store_i64_with_unfolded_off
   ret void
 }
 
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_gep_offset(i64* %p) {
+  %s = getelementptr i64, i64* %p, i32 3
+  store i64 0, i64* %s
+  ret void
+}
+
 ; When loading from a fixed address, materialize a zero.
 
 ; CHECK-LABEL: load_i32_from_numeric_address
@@ -159,6 +295,17 @@ define i32 @load_i8_s_with_folded_offset
   ret i32 %u
 }
 
+; Fold a gep offset into a sign-extending load.
+
+; CHECK-LABEL: load_i8_s_with_folded_gep_offset:
+; CHECK: i32.load8_s $push0=, 24($0){{$}}
+define i32 @load_i8_s_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %t = load i8, i8* %s
+  %u = sext i8 %t to i32
+  ret i32 %u
+}
+
 ; Fold an offset into a zero-extending load.
 
 ; CHECK-LABEL: load_i8_u_with_folded_offset:
@@ -172,6 +319,17 @@ define i32 @load_i8_u_with_folded_offset
   ret i32 %u
 }
 
+; Fold a gep offset into a zero-extending load.
+
+; CHECK-LABEL: load_i8_u_with_folded_gep_offset:
+; CHECK: i32.load8_u $push0=, 24($0){{$}}
+define i32 @load_i8_u_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %t = load i8, i8* %s
+  %u = zext i8 %t to i32
+  ret i32 %u
+}
+
 ; Fold an offset into a truncating store.
 
 ; CHECK-LABEL: store_i8_with_folded_offset:
@@ -183,3 +341,43 @@ define void @store_i8_with_folded_offset
   store i8 0, i8* %s
   ret void
 }
+
+; Fold a gep offset into a truncating store.
+
+; CHECK-LABEL: store_i8_with_folded_gep_offset:
+; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}}
+define void @store_i8_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  store i8 0, i8* %s
+  ret void
+}
+
+; Fold the offsets when lowering aggregate loads and stores.
+
+; CHECK-LABEL: aggregate_load_store:
+; CHECK: i32.load  $2=, 0($0){{$}}
+; CHECK: i32.load  $3=, 4($0){{$}}
+; CHECK: i32.load  $4=, 8($0){{$}}
+; CHECK: i32.load  $push0=, 12($0){{$}}
+; CHECK: i32.store $discard=, 12($1), $pop0{{$}}
+; CHECK: i32.store $discard=, 8($1), $4{{$}}
+; CHECK: i32.store $discard=, 4($1), $3{{$}}
+; CHECK: i32.store $discard=, 0($1), $2{{$}}
+define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) {
+  ; volatile so that things stay in order for the tests above
+  %t = load volatile {i32,i32,i32,i32}, {i32, i32,i32,i32}* %p
+  store volatile {i32,i32,i32,i32} %t, {i32, i32,i32,i32}* %q
+  ret void
+}
+
+; Fold the offsets when lowering aggregate return values.
+
+; CHECK-LABEL: aggregate_return:
+; CHECK: i32.const   $push0=, 0{{$}}
+; CHECK: i32.store   $push1=, 12($0), $pop0{{$}}
+; CHECK: i32.store   $push2=, 8($0), $pop1{{$}}
+; CHECK: i32.store   $push3=, 4($0), $pop2{{$}}
+; CHECK: i32.store   $discard=, 0($0), $pop3{{$}}
+define {i32,i32,i32,i32} @aggregate_return() {
+  ret {i32,i32,i32,i32} zeroinitializer
+}