[llvm] [AArch64][SVE] Fix hang when in VECTOR_HISTOGRAM DAG combine (PR #152539)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 09:01:58 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
The histogram DAG combine went into an infinite loop of creating the same histogram node due to an incorrect use of the `refineUniformBase` and `refineIndexType` APIs.
These APIs take SDValues by reference (SDValue&) and return `true` if they were "refined" (i.e., set them to new values).
Previously, this DAG combine would create the `Ops` array (used to create the new histogram node) before calling the `refine*` APIs, which copies the SDValues into the array, meaning the updated values were not used to create the new histogram node.
Reproducer: https://godbolt.org/z/hsGWhTaqY (it will timeout)
---
Full diff: https://github.com/llvm/llvm-project/pull/152539.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6-7)
- (added) llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll (+70)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 734191447d67f..5f1e38a33c891 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12843,22 +12843,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
SDLoc DL(HG);
EVT MemVT = HG->getMemoryVT();
+ EVT DataVT = Index.getValueType();
MachineMemOperand *MMO = HG->getMemOperand();
ISD::MemIndexType IndexType = HG->getIndexType();
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
- HG->getScale(), HG->getIntID()};
- if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
+ if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
+ refineIndexType(Index, IndexType, DataVT, DAG)) {
+ SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
+ HG->getScale(), HG->getIntID()};
return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
MMO, IndexType);
+ }
- EVT DataVT = Index.getValueType();
- if (refineIndexType(Index, IndexType, DataVT, DAG))
- return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
- MMO, IndexType);
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll b/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll
new file mode 100644
index 0000000000000..da04c67aa6c5c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-histcnt-dag-combine-hang.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test is reduced from a real world example that would cause the DAGCombiner to hang.
+
+define void @histcnt_loop(ptr %0, i64 %1, ptr %2, i64 %3, i64 %4) {
+; CHECK-LABEL: histcnt_loop:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #1 // =0x1
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: add x9, x0, x1
+; CHECK-NEXT: .LBB0_1: // %loop
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1]
+; CHECK-NEXT: lsl x10, x8, #1
+; CHECK-NEXT: add x11, x0, x10
+; CHECK-NEXT: add x10, x9, x10
+; CHECK-NEXT: lsl z1.d, z1.d, #1
+; CHECK-NEXT: ld1h { z4.d }, p0/z, [x11, #1, mul vl]
+; CHECK-NEXT: ld1h { z5.d }, p0/z, [x10, #1, mul vl]
+; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
+; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
+; CHECK-NEXT: ld1h { z3.d }, p0/z, [x9, x8, lsl #1]
+; CHECK-NEXT: add x8, x8, x3
+; CHECK-NEXT: cmp x4, x8
+; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
+; CHECK-NEXT: lsl z1.d, z4.d, #1
+; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: ld1h { z4.d }, p0/z, [x2, z1.d]
+; CHECK-NEXT: mad z2.d, p0/m, z0.d, z4.d
+; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
+; CHECK-NEXT: lsl z1.d, z3.d, #1
+; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
+; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
+; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
+; CHECK-NEXT: lsl z1.d, z5.d, #1
+; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
+; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
+; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
+; CHECK-NEXT: b.ne .LBB0_1
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+loop:
+ %6 = phi i64 [ 0, %entry ], [ %15, %loop ]
+ %7 = getelementptr inbounds nuw i16, ptr %0, i64 %6
+ %8 = getelementptr inbounds nuw i8, ptr %7, i64 %1
+ %9 = load <vscale x 4 x i16>, ptr %7, align 2
+ %10 = load <vscale x 4 x i16>, ptr %8, align 2
+ %11 = zext <vscale x 4 x i16> %9 to <vscale x 4 x i64>
+ %12 = zext <vscale x 4 x i16> %10 to <vscale x 4 x i64>
+ %13 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, <vscale x 4 x i64> %11
+ %14 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, <vscale x 4 x i64> %12
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16(<vscale x 4 x ptr> %13, i16 1, <vscale x 4 x i1> splat (i1 true))
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i16(<vscale x 4 x ptr> %14, i16 1, <vscale x 4 x i1> splat (i1 true))
+ %15 = add nuw i64 %6, %3
+ %16 = icmp eq i64 %15, %4
+ br i1 %16, label %exit, label %loop
+
+exit:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/152539
More information about the llvm-commits
mailing list