[llvm] [DAG][AArch64] Ensure that ResNo is correct for uses of Ptr when considering postinc. (PR #164810)

Thu Oct 23 04:58:34 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

We might be looking at a different use, for example in the uses of a i32,i64,ch preindex load.

Fixes #164775

---
Full diff: https://github.com/llvm/llvm-project/pull/164810.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+5-1) 
- (added) llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll (+73) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 310d35d9b1d1e..2b38185091685 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19995,8 +19995,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
   //    nor a successor of N. Otherwise, if Op is folded that would
   //    create a cycle.
   unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
-  for (SDNode *Op : Ptr->users()) {
+  for (SDUse &U : Ptr->uses()) {
+    if (U.getResNo() != Ptr.getResNo())
+      continue;
+
     // Check for #1.
+    SDNode *Op = U.getUser();
     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
       continue;
 
diff --git a/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll
new file mode 100644
index 0000000000000..85991fb9dfeb5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -O3 -mtriple=aarch64 | FileCheck %s
+
+; From #164775, this generates a pre-index load feeding a post-index store, that
+; was checking the wrong uses for post-inc. It seems quite delicate for it to
+; generate this combination at the wrong point to hit the same issue.
+
+ at g_260 = dso_local global i16 0
+ at g_480 = dso_local global i16 0
+
+define i32 @func_1(ptr %l_3253) {
+; CHECK-LABEL: func_1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov w9, #2 // =0x2
+; CHECK-NEXT:    mov w10, #96 // =0x60
+; CHECK-NEXT:    strb wzr, [x9]
+; CHECK-NEXT:    mov w9, #111 // =0x6f
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    str wzr, [x9]
+; CHECK-NEXT:    mov w9, #80 // =0x50
+; CHECK-NEXT:    adrp x1, .L_MergedGlobals
+; CHECK-NEXT:    add x1, x1, :lo12:.L_MergedGlobals
+; CHECK-NEXT:    strh wzr, [x8]
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #48 // =0x30
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #32 // =0x20
+; CHECK-NEXT:    str q0, [x10]
+; CHECK-NEXT:    mov w10, #64 // =0x40
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    mov w9, #16 // =0x10
+; CHECK-NEXT:    str q0, [x10]
+; CHECK-NEXT:    str q0, [x9]
+; CHECK-NEXT:    str q0, [x8]
+; CHECK-NEXT:    adrp x8, .L_MergedGlobals
+; CHECK-NEXT:    strb wzr, [x0, #8]
+; CHECK-NEXT:    strb wzr, [x0, #12]
+; CHECK-NEXT:    strb wzr, [x0, #16]
+; CHECK-NEXT:    strb wzr, [x0, #20]
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ldrh wzr, [x8, :lo12:.L_MergedGlobals]
+; CHECK-NEXT:    ldrh w8, [x1, #4]!
+; CHECK-NEXT:    sub w8, w8, #1
+; CHECK-NEXT:    strh w8, [x1]
+; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    b use
+entry:
+  %l_32531.sroa.3 = alloca [3 x i8], align 4
+  %l_32531.sroa.4 = alloca [115 x i8], align 4
+  call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.3)
+  call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.4)
+  call void @llvm.memset.p0.i64(ptr null, i8 0, i64 3, i1 false)
+  call void @llvm.memset.p0.i64(ptr null, i8 0, i64 115, i1 false)
+  %0 = getelementptr inbounds i8, ptr %l_3253, i64 8
+  store i8 0, ptr %0, align 4
+  %1 = getelementptr inbounds i8, ptr %l_3253, i64 12
+  store i8 0, ptr %1, align 4
+  %2 = getelementptr inbounds i8, ptr %l_3253, i64 16
+  store i8 0, ptr %2, align 4
+  %3 = getelementptr inbounds i8, ptr %l_3253, i64 20
+  store i8 0, ptr %3, align 4
+  %4 = load volatile i16, ptr @g_260, align 4
+  %5 = load i16, ptr @g_480, align 4
+  %dec.i.i = add i16 %5, -1
+  store i16 %dec.i.i, ptr @g_480, align 4
+  %call1 = tail call i32 @use(i32 0, ptr @g_480)
+  ret i32 %call1
+}
+
+declare i32 @use(i32, ptr)

``````````

</details>


https://github.com/llvm/llvm-project/pull/164810