[llvm] [AArch64] Consider runtime mode when deciding to use SVE for fixed-length vectors. (PR #96081)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 08:10:04 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
This also fixes the case where an SVE div is incorrectly to be assumed available in non-streaming mode with SME.
---
Patch is 65.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96081.diff
56 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+18-8)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+9-3)
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+7-1)
- (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (+216-100)
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+2-3)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll (+48-43)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c790209cc221f..a4fa25ffdd6ff 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1418,7 +1418,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->hasSVEorSME()) {
+ if (Subtarget->isSVEorStreamingSVEAvailable()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
@@ -1528,14 +1528,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
- MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ // NEON doesn't support masked loads/stores, but SME and SVE do.
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+ MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
+
+ // NEON doesn't support masked gathers/scatters, but SVE does.
+ if (Subtarget->isSVEAvailable()) {
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+ MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
}
// Firstly, exclude all scalable vector extending loads/truncating stores,
@@ -6986,7 +6996,7 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
// NEON-sized vectors can be emulated using SVE instructions.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
- return Subtarget->hasSVEorSME();
+ return Subtarget->isSVEorStreamingSVEAvailable();
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7ef7a89b5749f..5e1a370778914 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -185,6 +185,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
}
+ /// Returns true if the target has access to either the full range of SVE instructions,
+ /// or the streaming-compatible subset of SVE instructions.
+ bool isSVEorStreamingSVEAvailable() const {
+ return hasSVE() || hasSMEFA64() || (hasSME() && isStreaming());
+ }
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
// we don't yet support streaming-compatible codegen support that we trust
@@ -374,11 +380,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool useSVEForFixedLengthVectors() const {
- if (!isNeonAvailable())
- return hasSVEorSME();
+ if (!isSVEorStreamingSVEAvailable())
+ return false;
// Prefer NEON unless larger SVE registers are available.
- return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
+ return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
}
bool useSVEForFixedLengthVectors(EVT VT) const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7de9071476e7f..f94fa037a42c4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -187,6 +187,11 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
"with zero meaning no minimum size is assumed."),
cl::init(0), cl::Hidden);
+static cl::opt<bool> ForceStreaming(
+ "force-streaming",
+ cl::desc("Force the use of streaming code for all functions"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> ForceStreamingCompatible(
"force-streaming-compatible",
cl::desc("Force the use of streaming-compatible code for all functions"),
@@ -412,7 +417,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
bool HasMinSize = F.hasMinSize();
- bool IsStreaming = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+ bool IsStreaming = ForceStreaming ||
+ F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
bool IsStreamingCompatible =
F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 9c72afd84fa7c..cdf2a962f9322 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
; WITH VSCALE RANGE
@@ -362,145 +362,261 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
; FIXED-WIDTH VECTOR TYPES
define i32 @ctz_v16i1(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v16i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v16i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v16i1_poison(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v16i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v16i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
ret i32 %res
}
define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
-; CHECK-LABEL: add_i64_ctz_v16i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: incp x0, p0.b
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: incp x0, p0.b
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: incp x0, p0.b
+; STREAMING-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> %a, i1 1)
%add = add i64 %res, %b
ret i64 %add
}
define i32 @ctz_v8i1(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.8b, v0.8b, #7
-; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v8i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl8
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v8i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl8
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v8i1_poison(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.8b, v0.8b, #7
-; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v8i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl8
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v8i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl8
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
ret i32 %res
}
define i32 @ctz_v4i1(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v4i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT: ptrue p0.h, vl4
+; NONSTREAMING-NEXT: ptrue p1.h
+; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.h
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v4i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.h, vl4
+; STREAMING-NEXT: lsl z0.h, z0.h, #15
+; STREAMING-NEXT: ptrue p1.h
+; STREAMING-NEXT: asr z0.h, z0.h, #15
+; STREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.h
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v4i1_poison(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v4i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT: ptrue p0.h, vl4
+; NONSTREAMING-NEXT: ptrue p1.h
+; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.h
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v4i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.h, vl4
+; STREAMING-NEXT: lsl z0.h, z0.h, #15
+; STREAMING-NEXT: ptrue p1.h
+; STREAMING-NEXT: asr z0.h, z0.h, #15
+; STREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.h
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
ret i32 %res
}
define i32 @ctz_v2i1(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v2i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT: ptrue p0.s, vl2
+; NONSTREAMING-NEXT: ptrue p1.s
+; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.s
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v2i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.s, vl2
+; STREAMING-NEXT: lsl z0.s, z0.s, #31
+; STREAMING-NEXT: ptrue p1.s
+; STREAMING-NEXT: asr z0.s, z0.s, #31
+; STREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.s
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v2i1_poison(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v2i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT: ptrue p0.s, vl2
+; NONSTREAMING-NEXT: ptrue p1.s
+; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.s
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v2i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.s, vl2
+; STREAMING-NEXT: lsl z0.s, z0.s, #31
+; STREAMING-NEXT: ptrue p1.s
+; STREAMING-NEXT: asr z0.s, z0.s, #31
+; STREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.s
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
ret i32 %res
}
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
index 25f3540766618..48fbd14bd8540 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
define <vscale x 16 x i8> @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %tileslice) {
; CHECK-LABEL: extract_row_b:
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/96081
More information about the llvm-commits
mailing list