[llvm] [SelectionDAG] Fix AArch64 machine verifier bug when expanding LOOP_DEPENDENCE_MASK (PR #168221)

via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 15 11:15:52 PST 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/168221

>From 1f7f2467656f853507fecaabc6509449aff16c69 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 15 Nov 2025 14:06:38 -0500
Subject: [PATCH 1/2] Pre-commit test (NFC)

---
 .../AArch64/loop-dependence-mask-ccmp.ll      | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll

diff --git a/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll b/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll
new file mode 100644
index 0000000000000..d01507f5151a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 -verify-machineinstrs -stop-after=finalize-isel %s -o - | FileCheck %s
+
+; Regression test for a bug where getTargetConstant(0) was used instead of
+; getConstant(0) in ScalarizeVecRes_LOOP_DEPENDENCE_MASK, causing instruction
+; selection to incorrectly generate CCMPXr (register form) with an immediate
+; operand instead of CCMPXi (immediate form).
+;
+; Before the fix, this produced invalid machine code that failed verification:
+;   CCMPXr %2:gpr64common, 0, 4, 13, ...
+;                          ^ literal immediate in register operand slot
+;
+; Error: "*** Bad machine code: Expected a register operand. ***"
+;
+; After the fix, it correctly produces:
+;   CCMPXi %2:gpr64common, 0, 4, 13, ...
+;
+; This test verifies that CCMPXi (immediate form) is selected, not CCMPXr.
+; Assembly output would look identical for both, so we must check machine IR.
+
+define <1 x i1> @test_war_mask_ccmp(ptr %a, ptr %b) {
+; CHECK-LABEL: name: test_war_mask_ccmp
+; CHECK: bb.0.entry:
+; CHECK:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
+; CHECK:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
+; CHECK:   CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
+; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+; CHECK:   $w0 = COPY [[CSINCWr]]
+; CHECK:   RET_ReallyLR implicit $w0
+entry:
+  %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
+  ret <1 x i1> %0
+}
+
+define <1 x i1> @test_raw_mask_ccmp(ptr %a, ptr %b) {
+; CHECK-LABEL: name: test_raw_mask_ccmp
+; CHECK: bb.0.entry:
+; CHECK:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
+; CHECK:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
+; CHECK:   CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
+; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+; CHECK:   $w0 = COPY [[CSINCWr]]
+; CHECK:   RET_ReallyLR implicit $w0
+entry:
+  %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
+  ret <1 x i1> %0
+}
+
+declare <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr, ptr, i64)
+declare <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr, ptr, i64)
+

>From 67e8ebc32ea813e602833516d0aa62188c409d13 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 15 Nov 2025 14:02:51 -0500
Subject: [PATCH 2/2] [SelectionDAG] Fix AArch64 machine verifier bug when
 expanding LOOP_DEPENDENCE_MASK

We did not ensure new opcodes like mi/pl were filtered out when swapping, and TargetConstant nodes don't match TableGen ImmLeaf patterns during instruction selection. When this zero constant flows into the AArch64 CCMP formation code, the machine verifier hit an assertion in expensive checks.
---
 .../SelectionDAG/LegalizeVectorOps.cpp        |  2 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  9 ++--
 llvm/test/CodeGen/AArch64/alias_mask.ll       | 10 ++---
 .../AArch64/loop-dependence-mask-ccmp.ll      | 42 ++++++++++---------
 5 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7d979caa8bf82..e8d9bce43f6ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1829,7 +1829,7 @@ SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
   // If the difference is positive then some elements may alias
   EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
                                      Diff.getValueType());
-  SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
   SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
                              IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 417122d467054..71eeee78bd868 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -413,7 +413,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
   SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
   EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
                                      Diff.getValueType());
-  SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
   return DAG.getNode(ISD::OR, DL, CmpVT,
                      DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE),
                      DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 81c87ace76e56..564c7bca0a0d6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26013,9 +26013,12 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
     // Try again with the operands of the SUBS instruction and the condition
     // swapped. Due to canonicalization, this only helps for non-constant
     // operands of the SUBS instruction.
-    std::swap(CmpOpToMatch, CmpOpOther);
-    if (SDValue R = Fold(getSwappedCondition(CC), CmpOpToMatch, CmpOpToMatch))
-      return R;
+    auto NewCC = getSwappedCondition(CC);
+    if (NewCC != AArch64CC::AL) {
+      std::swap(CmpOpToMatch, CmpOpOther);
+      if (SDValue R = Fold(NewCC, CmpOpToMatch, CmpOpToMatch))
+        return R;
+    }
     return SDValue();
   }
 
diff --git a/llvm/test/CodeGen/AArch64/alias_mask.ll b/llvm/test/CodeGen/AArch64/alias_mask.ll
index c5d3677366480..fdd0a6a4709da 100644
--- a/llvm/test/CodeGen/AArch64/alias_mask.ll
+++ b/llvm/test/CodeGen/AArch64/alias_mask.ll
@@ -793,9 +793,8 @@ define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) {
 ; CHECK-LABEL: whilewr_8_scalarize:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub x8, x1, x0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    ccmp x8, #0, #4, le
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cmn x8, #1
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
@@ -845,9 +844,8 @@ define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) {
 ; CHECK-LABEL: whilerw_8_scalarize:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub x8, x1, x0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    ccmp x8, #0, #4, le
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cmn x8, #1
+; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
diff --git a/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll b/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll
index d01507f5151a2..ad66a631ad290 100644
--- a/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll
@@ -19,32 +19,34 @@
 ; Assembly output would look identical for both, so we must check machine IR.
 
 define <1 x i1> @test_war_mask_ccmp(ptr %a, ptr %b) {
-; CHECK-LABEL: name: test_war_mask_ccmp
-; CHECK: bb.0.entry:
-; CHECK:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
-; CHECK:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
-; CHECK:   CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
-; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-; CHECK:   $w0 = COPY [[CSINCWr]]
-; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-LABEL: name: test_war_mask_ccmp
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK-NEXT:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
+  ; CHECK-NEXT:   [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+  ; CHECK-NEXT:   $w0 = COPY [[CSINCWr]]
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
   ret <1 x i1> %0
 }
 
 define <1 x i1> @test_raw_mask_ccmp(ptr %a, ptr %b) {
-; CHECK-LABEL: name: test_raw_mask_ccmp
-; CHECK: bb.0.entry:
-; CHECK:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
-; CHECK:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
-; CHECK:   CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
-; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-; CHECK:   $w0 = COPY [[CSINCWr]]
-; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-LABEL: name: test_raw_mask_ccmp
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK-NEXT:   [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
+  ; CHECK-NEXT:   [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+  ; CHECK-NEXT:   $w0 = COPY [[CSINCWr]]
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
   ret <1 x i1> %0



More information about the llvm-commits mailing list