[llvm-branch-commits] [llvm] DAG: Preserve more flags when expanding gep (PR #110815)

Wed Oct 2 04:51:14 PDT 2024

================
@@ -4386,34 +4386,59 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
+      SDNodeFlags ScaleFlags;
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in a signed sense (mul nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        ScaleFlags.setNoSignedWrap(true);
+
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in an unsigned sense (mul nuw).
+      if (NW.hasNoUnsignedWrap())
+        ScaleFlags.setNoUnsignedWrap(true);
+
       if (ElementScalable) {
         EVT VScaleTy = N.getValueType().getScalarType();
         SDValue VScale = DAG.getNode(
             ISD::VSCALE, dl, VScaleTy,
             DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
         if (IsVectorGEP)
           VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
-        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale,
+                           ScaleFlags);
       } else {
         // If this is a multiply by a power of two, turn it into a shl
         // immediately.  This is a very common case.
         if (ElementMul != 1) {
           if (ElementMul.isPowerOf2()) {
             unsigned Amt = ElementMul.logBase2();
-            IdxN = DAG.getNode(ISD::SHL, dl,
-                               N.getValueType(), IdxN,
-                               DAG.getConstant(Amt, dl, IdxN.getValueType()));
+            IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
+                               DAG.getConstant(Amt, dl, IdxN.getValueType()),
+                               ScaleFlags);
           } else {
             SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                             IdxN.getValueType());
-            IdxN = DAG.getNode(ISD::MUL, dl,
-                               N.getValueType(), IdxN, Scale);
+            IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale,
+                               ScaleFlags);
           }
         }
       }
 
-      N = DAG.getNode(ISD::ADD, dl,
-                      N.getValueType(), N, IdxN);
+      SDNodeFlags AddFlags;
+
+      // The successive addition of each offset (without adding the base
+      // address) does not wrap the pointer index type in a signed sense (add
+      // nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        AddFlags.setNoSignedWrap(true);
----------------
arsenm wrote:

That's already tested here: https://github.com/llvm/llvm-project/blob/56474dac206d8592cccc229cb56e1f12b543ec97/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll#L134

But it's still not enough. computeKnownBits still can't prove the sign bit is zero during selection with all flags on both GEPs:

```
define void @gep_all_flags(i32 %idx, i32 %val) {
  %alloca = alloca [32 x i32], align 4, addrspace(5)
  %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
  %gep1 = getelementptr inbounds nuw i8, ptr addrspace(5) %gep0, i32 16
  store volatile i32 %val, ptr addrspace(5) %gep1, align 4
  ret void
}

```

```
Optimized legalized selection DAG: %bb.0 'gep_all_flags:'
SelectionDAG has 14 nodes:
  t0: ch,glue = EntryToken
      t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %8
            t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %7
          t7: i32 = shl nuw nsw # D:1 t2, Constant:i32<2>
        t8: i32 = add nuw # D:1 FrameIndex:i32<0>, t7
      t10: i32 = add nuw # D:1 t8, Constant:i32<16>
    t13: ch = store<(volatile store (s32) into %ir.gep1, addrspace 5)> # D:1 t0, t4, t10, undef:i32
  t14: ch = RET_GLUE t13
```



https://github.com/llvm/llvm-project/pull/110815