[llvm] [AArch64] Consider StreamingSVE in shouldExpandGetActiveLaneMask (PR #144722)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 08:17:46 PDT 2025
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/144722
If StreamingSVE is available, we may be able to lower the intrinsic
to the GET_ACTIVE_LANE_MASK node instead of expanding it.
Also adds the node to addTypeForFixedLengthSVE to ensure we lower
to the SVE instruction when useSVEForFixedLengthVectors is true.
>From 058d55faf4fd2c8349d47d14cdcde6590db3992a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 18 Jun 2025 14:42:04 +0000
Subject: [PATCH] [AArch64] Consider streaming SVE in
shouldExpandGetActiveLaneMask
If streaming SVE is available, we may be able to lower the intrinsic
to the GET_ACTIVE_LANE_MASK node instead of expanding it.
Also adds the node to addTypeForFixedLengthSVE to ensure we lower
to the SVE instruction when useSVEForFixedLengthVectors is true.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 7 +-
llvm/test/CodeGen/AArch64/active_lane_mask.ll | 139 ++++++++++++------
2 files changed, 103 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1169efce3123f..9af73851ef48b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2113,7 +2113,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
EVT OpVT) const {
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
- if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
+ if (!Subtarget->isSVEorStreamingSVEAvailable() ||
+ ResVT.getVectorElementType() != MVT::i1)
return true;
// Only support illegal types if the result is scalable and min elements > 1.
@@ -2283,6 +2284,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FSQRT, VT, Default);
setOperationAction(ISD::FSUB, VT, Default);
setOperationAction(ISD::FTRUNC, VT, Default);
+ setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Default);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
@@ -18099,7 +18101,8 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
/*IsEqual=*/false))
return While;
- if (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming()))
+ if (!N->getValueType(0).isScalableVT() ||
+ (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
return SDValue();
if (!N->hasNUsesOfValue(2, 0))
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index fd34a1498bcd2..cc05836d3d156 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
; == Scalable ==
@@ -292,61 +293,117 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
}
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v16i1_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.b, #0, #1
-; CHECK-NEXT: dup v1.16b, w0
-; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: dup v1.16b, w1
-; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SVE-LABEL: lane_mask_v16i1_i8:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: index z0.b, #0, #1
+; CHECK-SVE-NEXT: dup v1.16b, w0
+; CHECK-SVE-NEXT: uqadd v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: dup v1.16b, w1
+; CHECK-SVE-NEXT: cmhi v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: index z0.b, w0, #1
+; CHECK-STREAMING-NEXT: mov z1.b, w0
+; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
+; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-STREAMING-NEXT: mov z1.b, w1
+; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
ret <16 x i1> %active.lane.mask
}
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v8i1_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.b, #0, #1
-; CHECK-NEXT: dup v1.8b, w0
-; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: dup v1.8b, w1
-; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SVE-LABEL: lane_mask_v8i1_i8:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: index z0.b, #0, #1
+; CHECK-SVE-NEXT: dup v1.8b, w0
+; CHECK-SVE-NEXT: uqadd v0.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT: dup v1.8b, w1
+; CHECK-SVE-NEXT: cmhi v0.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: index z0.b, w0, #1
+; CHECK-STREAMING-NEXT: mov z1.b, w0
+; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
+; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-STREAMING-NEXT: mov z1.b, w1
+; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
ret <8 x i1> %active.lane.mask
}
define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v4i1_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v0.4h, w0
-; CHECK-NEXT: index z1.h, #0, #1
-; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
-; CHECK-NEXT: dup v3.4h, w1
-; CHECK-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-NEXT: bic v3.4h, #255, lsl #8
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
-; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h
-; CHECK-NEXT: ret
+; CHECK-SVE-LABEL: lane_mask_v4i1_i8:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: dup v0.4h, w0
+; CHECK-SVE-NEXT: index z1.h, #0, #1
+; CHECK-SVE-NEXT: movi d2, #0xff00ff00ff00ff
+; CHECK-SVE-NEXT: dup v3.4h, w1
+; CHECK-SVE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SVE-NEXT: bic v3.4h, #255, lsl #8
+; CHECK-SVE-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-SVE-NEXT: umin v0.4h, v0.4h, v2.4h
+; CHECK-SVE-NEXT: cmhi v0.4h, v3.4h, v0.4h
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: mov z1.h, w0
+; CHECK-STREAMING-NEXT: index z0.h, #0, #1
+; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
+; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
+; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
+; CHECK-STREAMING-NEXT: mov z1.h, w1
+; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
+; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
+; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
}
define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v2i1_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi d0, #0x0000ff000000ff
-; CHECK-NEXT: dup v1.2s, w0
-; CHECK-NEXT: index z2.s, #0, #1
-; CHECK-NEXT: dup v3.2s, w1
-; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
-; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
-; CHECK-NEXT: and v2.8b, v3.8b, v0.8b
-; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s
-; CHECK-NEXT: ret
+; CHECK-SVE-LABEL: lane_mask_v2i1_i8:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: movi d0, #0x0000ff000000ff
+; CHECK-SVE-NEXT: dup v1.2s, w0
+; CHECK-SVE-NEXT: index z2.s, #0, #1
+; CHECK-SVE-NEXT: dup v3.2s, w1
+; CHECK-SVE-NEXT: and v1.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SVE-NEXT: and v2.8b, v3.8b, v0.8b
+; CHECK-SVE-NEXT: umin v0.2s, v1.2s, v0.2s
+; CHECK-SVE-NEXT: cmhi v0.2s, v2.2s, v0.2s
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: and w8, w0, #0xff
+; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
+; CHECK-STREAMING-NEXT: index z0.s, w8, #1
+; CHECK-STREAMING-NEXT: and w8, w1, #0xff
+; CHECK-STREAMING-NEXT: mov z1.s, w8
+; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
+; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
}
More information about the llvm-commits
mailing list