[llvm] [AArch64] Consider StreamingSVE in shouldExpandGetActiveLaneMask (PR #144722)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 18 08:18:20 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Kerry McLaughlin (kmclaughlin-arm)

<details>
<summary>Changes</summary>

If StreamingSVE is available, we may be able to lower the intrinsic
to the GET_ACTIVE_LANE_MASK node instead of expanding it.
Also adds the node to addTypeForFixedLengthSVE to ensure we lower
to the SVE instruction when useSVEForFixedLengthVectors is true.

---
Full diff: https://github.com/llvm/llvm-project/pull/144722.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+5-2) 
- (modified) llvm/test/CodeGen/AArch64/active_lane_mask.ll (+98-41) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1169efce3123f..9af73851ef48b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2113,7 +2113,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
 bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
                                                           EVT OpVT) const {
   // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
-  if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
+  if (!Subtarget->isSVEorStreamingSVEAvailable() ||
+      ResVT.getVectorElementType() != MVT::i1)
     return true;
 
   // Only support illegal types if the result is scalable and min elements > 1.
@@ -2283,6 +2284,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
   setOperationAction(ISD::FSQRT, VT, Default);
   setOperationAction(ISD::FSUB, VT, Default);
   setOperationAction(ISD::FTRUNC, VT, Default);
+  setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Default);
   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
   setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
   setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
@@ -18099,7 +18101,8 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                                 /*IsEqual=*/false))
     return While;
 
-  if (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming()))
+  if (!N->getValueType(0).isScalableVT() ||
+      (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
     return SDValue();
 
   if (!N->hasNUsesOfValue(2, 0))
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index fd34a1498bcd2..cc05836d3d156 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
 
 ; == Scalable ==
 
@@ -292,61 +293,117 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
 }
 
 define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v16i1_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z0.b, #0, #1
-; CHECK-NEXT:    dup v1.16b, w0
-; CHECK-NEXT:    uqadd v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    dup v1.16b, w1
-; CHECK-NEXT:    cmhi v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SVE-LABEL: lane_mask_v16i1_i8:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    index z0.b, #0, #1
+; CHECK-SVE-NEXT:    dup v1.16b, w0
+; CHECK-SVE-NEXT:    uqadd v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT:    dup v1.16b, w1
+; CHECK-SVE-NEXT:    cmhi v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    index z0.b, w0, #1
+; CHECK-STREAMING-NEXT:    mov z1.b, w0
+; CHECK-STREAMING-NEXT:    ptrue p0.b, vl16
+; CHECK-STREAMING-NEXT:    cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-STREAMING-NEXT:    mov z1.b, w1
+; CHECK-STREAMING-NEXT:    cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
   ret <16 x i1> %active.lane.mask
 }
 
 define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v8i1_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z0.b, #0, #1
-; CHECK-NEXT:    dup v1.8b, w0
-; CHECK-NEXT:    uqadd v0.8b, v1.8b, v0.8b
-; CHECK-NEXT:    dup v1.8b, w1
-; CHECK-NEXT:    cmhi v0.8b, v1.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SVE-LABEL: lane_mask_v8i1_i8:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    index z0.b, #0, #1
+; CHECK-SVE-NEXT:    dup v1.8b, w0
+; CHECK-SVE-NEXT:    uqadd v0.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT:    dup v1.8b, w1
+; CHECK-SVE-NEXT:    cmhi v0.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    index z0.b, w0, #1
+; CHECK-STREAMING-NEXT:    mov z1.b, w0
+; CHECK-STREAMING-NEXT:    ptrue p0.b, vl8
+; CHECK-STREAMING-NEXT:    cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-STREAMING-NEXT:    mov z1.b, w1
+; CHECK-STREAMING-NEXT:    cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
   ret <8 x i1> %active.lane.mask
 }
 
 define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v4i1_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    dup v0.4h, w0
-; CHECK-NEXT:    index z1.h, #0, #1
-; CHECK-NEXT:    movi d2, #0xff00ff00ff00ff
-; CHECK-NEXT:    dup v3.4h, w1
-; CHECK-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-NEXT:    bic v3.4h, #255, lsl #8
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    umin v0.4h, v0.4h, v2.4h
-; CHECK-NEXT:    cmhi v0.4h, v3.4h, v0.4h
-; CHECK-NEXT:    ret
+; CHECK-SVE-LABEL: lane_mask_v4i1_i8:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    dup v0.4h, w0
+; CHECK-SVE-NEXT:    index z1.h, #0, #1
+; CHECK-SVE-NEXT:    movi d2, #0xff00ff00ff00ff
+; CHECK-SVE-NEXT:    dup v3.4h, w1
+; CHECK-SVE-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SVE-NEXT:    bic v3.4h, #255, lsl #8
+; CHECK-SVE-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-SVE-NEXT:    umin v0.4h, v0.4h, v2.4h
+; CHECK-SVE-NEXT:    cmhi v0.4h, v3.4h, v0.4h
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    mov z1.h, w0
+; CHECK-STREAMING-NEXT:    index z0.h, #0, #1
+; CHECK-STREAMING-NEXT:    ptrue p0.h, vl4
+; CHECK-STREAMING-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-STREAMING-NEXT:    add z0.h, z1.h, z0.h
+; CHECK-STREAMING-NEXT:    mov z1.h, w1
+; CHECK-STREAMING-NEXT:    umin z0.h, z0.h, #255
+; CHECK-STREAMING-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-STREAMING-NEXT:    cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-STREAMING-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
   ret <4 x i1> %active.lane.mask
 }
 
 define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
-; CHECK-LABEL: lane_mask_v2i1_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi d0, #0x0000ff000000ff
-; CHECK-NEXT:    dup v1.2s, w0
-; CHECK-NEXT:    index z2.s, #0, #1
-; CHECK-NEXT:    dup v3.2s, w1
-; CHECK-NEXT:    and v1.8b, v1.8b, v0.8b
-; CHECK-NEXT:    add v1.2s, v1.2s, v2.2s
-; CHECK-NEXT:    and v2.8b, v3.8b, v0.8b
-; CHECK-NEXT:    umin v0.2s, v1.2s, v0.2s
-; CHECK-NEXT:    cmhi v0.2s, v2.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SVE-LABEL: lane_mask_v2i1_i8:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    movi d0, #0x0000ff000000ff
+; CHECK-SVE-NEXT:    dup v1.2s, w0
+; CHECK-SVE-NEXT:    index z2.s, #0, #1
+; CHECK-SVE-NEXT:    dup v3.2s, w1
+; CHECK-SVE-NEXT:    and v1.8b, v1.8b, v0.8b
+; CHECK-SVE-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SVE-NEXT:    and v2.8b, v3.8b, v0.8b
+; CHECK-SVE-NEXT:    umin v0.2s, v1.2s, v0.2s
+; CHECK-SVE-NEXT:    cmhi v0.2s, v2.2s, v0.2s
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    and w8, w0, #0xff
+; CHECK-STREAMING-NEXT:    ptrue p0.s, vl2
+; CHECK-STREAMING-NEXT:    index z0.s, w8, #1
+; CHECK-STREAMING-NEXT:    and w8, w1, #0xff
+; CHECK-STREAMING-NEXT:    mov z1.s, w8
+; CHECK-STREAMING-NEXT:    umin z0.s, z0.s, #255
+; CHECK-STREAMING-NEXT:    cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-STREAMING-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
   ret <2 x i1> %active.lane.mask
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/144722


More information about the llvm-commits mailing list