[llvm-branch-commits] [llvm] release/22.x: [AArch64][SME2] Allow lowering to whilelo.x2 in non-streaming mode (#178399) (PR #178672)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 29 07:17:26 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Kerry McLaughlin (kmclaughlin-arm)
<details>
<summary>Changes</summary>
Backport: https://github.com/llvm/llvm-project/commit/162267ee90019c6b8241dcf470a2d3fae2b306a7
---
Full diff: https://github.com/llvm/llvm-project/pull/178672.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-5)
- (modified) llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll (+26-25)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0da191d66ff32..5dfa65fca4f4a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1523,8 +1523,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Legal);
}
- if (Subtarget->hasSVE2p1() ||
- (Subtarget->hasSME2() && Subtarget->isStreaming()))
+ if (Subtarget->isSVEorStreamingSVEAvailable() &&
+ (Subtarget->hasSVE2p1() || Subtarget->hasSME2()))
setOperationAction(ISD::GET_ACTIVE_LANE_MASK, MVT::nxv32i1, Custom);
for (auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
@@ -19289,7 +19289,8 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return While;
if (!N->getValueType(0).isScalableVector() ||
- (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
+ !ST->isSVEorStreamingSVEAvailable() ||
+ !(ST->hasSVE2p1() || ST->hasSME2()))
return SDValue();
// Count the number of users which are extract_vectors.
@@ -29107,8 +29108,8 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
void AArch64TargetLowering::ReplaceGetActiveLaneMaskResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
- assert((Subtarget->hasSVE2p1() ||
- (Subtarget->hasSME2() && Subtarget->isStreaming())) &&
+ assert((Subtarget->isSVEorStreamingSVEAvailable() &&
+ (Subtarget->hasSVE2p1() || Subtarget->hasSME2())) &&
"Custom lower of get.active.lane.mask missing required feature.");
assert(N->getValueType(0) == MVT::nxv32i1 &&
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index e2c861b40e706..1c3208fd8f604 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
-; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
-; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
+; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-NON-STREAMING-SVE2p1-SME2
+; RUN: llc -mattr=+sve -mattr=+sme2 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-NON-STREAMING-SVE2p1-SME2
+; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-STREAMING-SME2
target triple = "aarch64-linux"
; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -185,30 +186,30 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE-NEXT: b use
;
-; CHECK-SVE2p1-LABEL: test_fixed_extract:
-; CHECK-SVE2p1: // %bb.0:
-; CHECK-SVE2p1-NEXT: whilelo p0.s, x0, x1
-; CHECK-SVE2p1-NEXT: cset w8, mi
-; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
-; CHECK-SVE2p1-NEXT: fmov s0, w8
-; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
-; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
-; CHECK-SVE2p1-NEXT: b use
+; CHECK-NON-STREAMING-SVE2p1-SME2-LABEL: test_fixed_extract:
+; CHECK-NON-STREAMING-SVE2p1-SME2: // %bb.0:
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: whilelo p0.s, x0, x1
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: cset w8, mi
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: fmov s0, w8
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: mov v0.s[1], v1.s[1]
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
+; CHECK-NON-STREAMING-SVE2p1-SME2-NEXT: b use
;
-; CHECK-SME2-LABEL: test_fixed_extract:
-; CHECK-SME2: // %bb.0:
-; CHECK-SME2-NEXT: whilelo p0.s, x0, x1
-; CHECK-SME2-NEXT: cset w8, mi
-; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
-; CHECK-SME2-NEXT: fmov s2, w8
-; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
-; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
-; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-SME2-NEXT: b use
+; CHECK-STREAMING-SME2-LABEL: test_fixed_extract:
+; CHECK-STREAMING-SME2: // %bb.0:
+; CHECK-STREAMING-SME2-NEXT: whilelo p0.s, x0, x1
+; CHECK-STREAMING-SME2-NEXT: cset w8, mi
+; CHECK-STREAMING-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-STREAMING-SME2-NEXT: fmov s2, w8
+; CHECK-STREAMING-SME2-NEXT: mov z0.s, z1.s[1]
+; CHECK-STREAMING-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
+; CHECK-STREAMING-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
+; CHECK-STREAMING-SME2-NEXT: zip1 z0.s, z2.s, z0.s
+; CHECK-STREAMING-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
%v1 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 2)
``````````
</details>
https://github.com/llvm/llvm-project/pull/178672
More information about the llvm-branch-commits
mailing list