[llvm] [AArch64] Fix active.lane.mask(0, cttz.elts(x)) -> 'brkb' transform (PR #180177)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 6 04:04:58 PST 2026
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/180177
If the result type and mask don't match, the transform is not valid.
>From dc75c215ebb0c84c2c2c704d468513492f852240 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 6 Feb 2026 10:22:44 +0000
Subject: [PATCH 1/2] Pre-commit test
---
.../CodeGen/AArch64/sve-mask-partition.ll | 52 +++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
index 8b712bd7e42a7..e2e7d80545dc8 100644
--- a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
+++ b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
@@ -558,3 +558,55 @@ define <vscale x 16 x i1> @mask_exclude_active_nxv16_nonzero_lower_bound(<vscale
%mask.out = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 1, i64 %tz.elts)
ret <vscale x 16 x i1> %mask.out
}
+
+define <vscale x 4 x i1> @mask_exclude_active_narrower_result_type(<vscale x 8 x i1> %mask.in) {
+; CHECK-LABEL: mask_exclude_active_narrower_result_type:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: brkb p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 8 x i1> %mask.in, i1 false)
+ %mask.out = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
+ ret <vscale x 4 x i1> %mask.out
+}
+
+define <vscale x 16 x i1> @mask_exclude_active_wider_result_type(<vscale x 8 x i1> %mask.in) {
+; CHECK-LABEL: mask_exclude_active_wider_result_type:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: brkb p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 8 x i1> %mask.in, i1 false)
+ %mask.out = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
+ ret <vscale x 16 x i1> %mask.out
+}
+
+define <4 x i1> @mask_exclude_active_narrower_result_type_fixed(<8 x i1> %mask.in) {
+; CHECK-LABEL: mask_exclude_active_narrower_result_type_fixed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.8b, v0.8b, #7
+; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: brkb p0.b, p0/z, p1.b
+; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<8 x i1> %mask.in, i1 false)
+ %mask.out = call <4 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
+ ret <4 x i1> %mask.out
+}
+
+define <16 x i1> @mask_exclude_active_wider_result_type_fixed(<8 x i1> %mask.in) {
+; CHECK-LABEL: mask_exclude_active_wider_result_type_fixed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.8b, v0.8b, #7
+; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: brkb p0.b, p0/z, p1.b
+; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<8 x i1> %mask.in, i1 false)
+ %mask.out = call <16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
+ ret <16 x i1> %mask.out
+}
>From c9bbe9dfb12a9aa41b3ee4d879f5bcb7824625b7 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 6 Feb 2026 10:58:01 +0000
Subject: [PATCH 2/2] Don't optimize to 'brkb' if the result type and mask
don't match.
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +++-
llvm/test/CodeGen/AArch64/sve-mask-partition.ll | 6 ++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 94216f6572f0a..1b826d8b0e1fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6049,11 +6049,13 @@ static SDValue optimizeBrk(SDNode *N, SelectionDAG &DAG) {
// We're looking for an upper bound based on CTTZ_ELTS; this would be selected
// as a cntp(brk(Pg, Mask)), but if we're just going to make a whilelo based
// on that then we just need the brk.
- if (Upper.getOpcode() != AArch64ISD::CTTZ_ELTS || !VT.isScalableVector())
+ if (Upper.getOpcode() != AArch64ISD::CTTZ_ELTS || !VT.isScalableVector() ||
+ Upper.getOperand(0).getValueType() != VT)
return SDValue();
SDValue Pg = Upper->getOperand(0);
SDValue Mask = Upper->getOperand(1);
+ assert(Pg.getValueType() == Mask.getValueType() && "predicate types must match");
// brk{a,b} only support .b forms, so cast to make sure all our p regs match.
Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG);
diff --git a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
index e2e7d80545dc8..30c5220decda7 100644
--- a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
+++ b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
@@ -564,6 +564,8 @@ define <vscale x 4 x i1> @mask_exclude_active_narrower_result_type(<vscale x 8 x
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
+; CHECK-NEXT: cntp x8, p0, p0.h
+; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ret
%tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 8 x i1> %mask.in, i1 false)
%mask.out = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
@@ -575,6 +577,8 @@ define <vscale x 16 x i1> @mask_exclude_active_wider_result_type(<vscale x 8 x i
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
+; CHECK-NEXT: cntp x8, p0, p0.h
+; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ret
%tz.elts = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 8 x i1> %mask.in, i1 false)
%mask.out = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %tz.elts)
@@ -588,6 +592,8 @@ define <4 x i1> @mask_exclude_active_narrower_result_type_fixed(<8 x i1> %mask.i
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: brkb p0.b, p0/z, p1.b
+; CHECK-NEXT: cntp x8, p0, p0.b
+; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list