[llvm-branch-commits] [llvm] DAG: Preserve more flags when expanding gep (PR #110815)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Oct 2 03:38:09 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/110815

>From 56474dac206d8592cccc229cb56e1f12b543ec97 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 2 Oct 2024 11:20:23 +0400
Subject: [PATCH 1/2] DAG: Preserve more flags when expanding gep

This allows selecting the addressing mode for stack instructions
in cases where we need to prove the sign bit is zero.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 41 +++++++++++++++----
 .../CodeGen/AMDGPU/gep-flags-stack-offsets.ll |  6 +--
 .../pointer-add-unknown-offset-debug-info.ll  |  2 +-
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 25213f587116d5..6838c0b530a363 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4386,6 +4386,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
+      SDNodeFlags ScaleFlags;
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in a signed sense (mul nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        ScaleFlags.setNoSignedWrap(true);
+
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in an unsigned sense (mul nuw).
+      if (NW.hasNoUnsignedWrap())
+        ScaleFlags.setNoUnsignedWrap(true);
+
       if (ElementScalable) {
         EVT VScaleTy = N.getValueType().getScalarType();
         SDValue VScale = DAG.getNode(
@@ -4393,27 +4404,41 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
             DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
         if (IsVectorGEP)
           VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
-        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale,
+                           ScaleFlags);
       } else {
         // If this is a multiply by a power of two, turn it into a shl
         // immediately.  This is a very common case.
         if (ElementMul != 1) {
           if (ElementMul.isPowerOf2()) {
             unsigned Amt = ElementMul.logBase2();
-            IdxN = DAG.getNode(ISD::SHL, dl,
-                               N.getValueType(), IdxN,
-                               DAG.getConstant(Amt, dl, IdxN.getValueType()));
+            IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
+                               DAG.getConstant(Amt, dl, IdxN.getValueType()),
+                               ScaleFlags);
           } else {
             SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                             IdxN.getValueType());
-            IdxN = DAG.getNode(ISD::MUL, dl,
-                               N.getValueType(), IdxN, Scale);
+            IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale,
+                               ScaleFlags);
           }
         }
       }
 
-      N = DAG.getNode(ISD::ADD, dl,
-                      N.getValueType(), N, IdxN);
+      SDNodeFlags AddFlags;
+
+      // The successive addition of each offset (without adding the base
+      // address) does not wrap the pointer index type in a signed sense (add
+      // nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        AddFlags.setNoSignedWrap(true);
+
+      // The successive addition of each offset (without adding the base
+      // address) does not wrap the pointer index type in an unsigned sense (add
+      // nuw).
+      if (NW.hasNoUnsignedWrap())
+        AddFlags.setNoUnsignedWrap(true);
+
+      N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags);
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
index 782894976c711c..a39afa6f609c7e 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -118,8 +118,7 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -145,8 +144,7 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
index 63d7391bd7d4f5..5fe5a90a973174 100644
--- a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
+++ b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
@@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:i64regs = COPY $i1
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:i64regs = COPY $i0
   ; CHECK-NEXT:   [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0
-  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2
+  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2
   ; CHECK-NEXT:   DBG_VALUE_LIST !13, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
   ; CHECK-NEXT:   DBG_VALUE_LIST !14, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_plus_uconst, 3, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
   ; CHECK-NEXT:   DBG_VALUE_LIST !15, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[COPY1]], [[SLLXri]], [[SLLXri]], debug-location !16

>From a20f4c0cd6c68bd78e0df451186d10ce8bc2026d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 2 Oct 2024 14:13:31 +0400
Subject: [PATCH 2/2] Address comments

This loses the benefit in the test
---
 .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp    | 13 ++++---------
 llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll |  6 ++++--
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6838c0b530a363..771f219a3cbcc9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4426,15 +4426,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       SDNodeFlags AddFlags;
 
-      // The successive addition of each offset (without adding the base
-      // address) does not wrap the pointer index type in a signed sense (add
-      // nsw).
-      if (NW.hasNoUnsignedSignedWrap())
-        AddFlags.setNoSignedWrap(true);
-
-      // The successive addition of each offset (without adding the base
-      // address) does not wrap the pointer index type in an unsigned sense (add
-      // nuw).
+      // The successive addition of the current address, truncated to the
+      // pointer index type and interpreted as an unsigned number, and each
+      // offset, also interpreted as an unsigned number, does not wrap the
+      // pointer index type (add nuw).
       if (NW.hasNoUnsignedWrap())
         AddFlags.setNoUnsignedWrap(true);
 
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
index a39afa6f609c7e..782894976c711c 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -118,7 +118,8 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -144,7 +145,8 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;



More information about the llvm-branch-commits mailing list