[llvm] clastb representation in existing IR, and AArch64 codegen (PR #112738)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 08:19:16 PST 2024
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/112738
>From 5df59e400eea6d57edbf7c4b8cbf536deed0db71 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 15 Oct 2024 12:17:39 +0000
Subject: [PATCH] [AArch64] Improve codegen for extract.last.active
When SVE support is present, we can use the 'clastb' instruction to
perform the work of extract.last.active.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 5 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 8 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 45 ++++
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 +
.../AArch64/vector-extract-last-active.ll | 202 ++++--------------
5 files changed, 100 insertions(+), 163 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 6a41094ff933b0..530ca40d45bc96 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -490,6 +490,11 @@ class TargetLoweringBase {
return true;
}
+ virtual bool
+ shouldExpandVectorExtractLastActive(const IntrinsicInst *I) const {
+ return true;
+ }
+
// Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
// vecreduce(op(x, y)) for the reduction opcode RedOpc.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38a3e9b91052d..baf10d5e391a88 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6417,6 +6417,13 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.shouldExpandVectorExtractLastActive(cast<IntrinsicInst>(&I))) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
"Tried lowering invalid vector extract last");
SDLoc sdl = getCurSDLoc();
@@ -6432,7 +6439,6 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value.
if (DataVT.isScalableVector())
VScaleRange = getVScaleRange(I.getCaller(), 64);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned EltWidth = TLI.getBitWidthForCttzElements(
I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true,
&VScaleRange);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e1be825fcf7bf3..2c4fce6e0aa076 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2072,6 +2072,23 @@ bool AArch64TargetLowering::shouldExpandVectorMatch(EVT VT,
return true;
}
+bool AArch64TargetLowering::shouldExpandVectorExtractLastActive(
+ const IntrinsicInst *I) const {
+ // 'clastb' requires SVE support.
+ if (!Subtarget->hasSVE())
+ return true;
+
+ // Check if the input data vector is a legal supported type.
+ EVT VT = EVT::getEVT(I->getArgOperand(0)->getType());
+ EVT ScalarVT = VT.getScalarType();
+
+ if (ScalarVT != MVT::i8 && ScalarVT != MVT::i16 && ScalarVT != MVT::i32 &&
+ ScalarVT != MVT::i64 && ScalarVT != MVT::f32 && ScalarVT != MVT::f64)
+ return true;
+
+ return VT.getStoreSizeInBits().getKnownMinValue() != 128;
+}
+
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
@@ -6405,6 +6422,22 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::experimental_vector_match: {
return LowerVectorMatch(Op, DAG);
}
+ case Intrinsic::experimental_vector_extract_last_active: {
+ SDValue Data = Op.getOperand(1);
+ SDValue Mask = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ EVT VT = Op.getValueType();
+ EVT DataVT = Data.getValueType();
+
+ if (DataVT.isFixedLengthVector()) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, DataVT);
+ EVT MaskVT = ContainerVT.changeElementType(MVT::i1);
+ Data = convertToScalableVector(DAG, ContainerVT, Data);
+ Mask = convertToScalableVector(DAG, MaskVT, Mask);
+ }
+
+ return DAG.getNode(AArch64ISD::CLASTB_N, dl, VT, Mask, PassThru, Data);
+ }
}
}
@@ -27192,6 +27225,18 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
+ case Intrinsic::experimental_vector_extract_last_active: {
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
+ "custom lowering for unexpected type");
+ SDLoc DL(N);
+ auto PassThru =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(3));
+ auto Extract =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2), PassThru);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
+ return;
+ }
}
}
case ISD::READ_REGISTER: {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index cb0b9e965277aa..845bf0dabe6615 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -987,6 +987,9 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override;
+ bool
+ shouldExpandVectorExtractLastActive(const IntrinsicInst *) const override;
+
/// If a change in streaming mode is required on entry to/return from a
/// function call it emits and returns the corresponding SMSTART or SMSTOP
/// node. \p Condition should be one of the enum values from
diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
index c0f1720e1cf8b3..b213927edeb097 100644
--- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
+++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
@@ -27,23 +27,12 @@ define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
;
; SVE-FIXED-LABEL: extract_last_i8:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
-; SVE-FIXED-NEXT: index z2.b, #0, #1
-; SVE-FIXED-NEXT: cmeq v3.16b, v1.16b, #0
; SVE-FIXED-NEXT: cmtst v1.16b, v1.16b, v1.16b
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: bic v2.16b, v2.16b, v3.16b
-; SVE-FIXED-NEXT: umaxv b1, v1.16b
-; SVE-FIXED-NEXT: umaxv b2, v2.16b
-; SVE-FIXED-NEXT: fmov w8, s2
-; SVE-FIXED-NEXT: bfxil x9, x8, #0, #4
-; SVE-FIXED-NEXT: ldrb w8, [x9]
-; SVE-FIXED-NEXT: fmov w9, s1
-; SVE-FIXED-NEXT: tst w9, #0x1
-; SVE-FIXED-NEXT: csel w0, w8, w0, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.b
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.b, z1.b, #0x1
+; SVE-FIXED-NEXT: cmpne p0.b, p0/z, z1.b, #0
+; SVE-FIXED-NEXT: clastb w0, p0, w0, z0.b
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <16 x i8> %mask, zeroinitializer
%res = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> %data, <16 x i1> %notzero, i8 %passthru)
@@ -75,23 +64,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
;
; SVE-FIXED-LABEL: extract_last_i16:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h
-; SVE-FIXED-NEXT: index z2.b, #0, #1
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: xtn v1.8b, v1.8h
-; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT: umaxv b1, v1.8b
-; SVE-FIXED-NEXT: umaxv b2, v2.8b
-; SVE-FIXED-NEXT: fmov w8, s2
-; SVE-FIXED-NEXT: bfi x9, x8, #1, #3
-; SVE-FIXED-NEXT: ldrh w8, [x9]
-; SVE-FIXED-NEXT: fmov w9, s1
-; SVE-FIXED-NEXT: tst w9, #0x1
-; SVE-FIXED-NEXT: csel w0, w8, w0, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.h
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.h, z1.h, #0x1
+; SVE-FIXED-NEXT: cmpne p0.h, p0/z, z1.h, #0
+; SVE-FIXED-NEXT: clastb w0, p0, w0, z0.h
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <8 x i16> %mask, zeroinitializer
%res = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> %data, <8 x i1> %notzero, i16 %passthru)
@@ -123,23 +101,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
;
; SVE-FIXED-LABEL: extract_last_i32:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
-; SVE-FIXED-NEXT: index z2.h, #0, #1
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: xtn v1.4h, v1.4s
-; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT: umaxv h1, v1.4h
-; SVE-FIXED-NEXT: umaxv h2, v2.4h
-; SVE-FIXED-NEXT: fmov w8, s2
-; SVE-FIXED-NEXT: bfi x9, x8, #2, #2
-; SVE-FIXED-NEXT: ldr w8, [x9]
-; SVE-FIXED-NEXT: fmov w9, s1
-; SVE-FIXED-NEXT: tst w9, #0x1
-; SVE-FIXED-NEXT: csel w0, w8, w0, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.s
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.s, z1.s, #0x1
+; SVE-FIXED-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; SVE-FIXED-NEXT: clastb w0, p0, w0, z0.s
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <4 x i32> %mask, zeroinitializer
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %notzero, i32 %passthru)
@@ -171,23 +138,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
;
; SVE-FIXED-LABEL: extract_last_i64:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
-; SVE-FIXED-NEXT: index z2.s, #0, #1
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: xtn v1.2s, v1.2d
-; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s
-; SVE-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s
-; SVE-FIXED-NEXT: fmov w8, s2
-; SVE-FIXED-NEXT: bfi x9, x8, #3, #1
-; SVE-FIXED-NEXT: ldr x8, [x9]
-; SVE-FIXED-NEXT: fmov w9, s1
-; SVE-FIXED-NEXT: tst w9, #0x1
-; SVE-FIXED-NEXT: csel x0, x8, x0, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.d
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.d, z1.d, #0x1
+; SVE-FIXED-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; SVE-FIXED-NEXT: clastb x0, p0, x0, z0.d
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <2 x i64> %mask, zeroinitializer
%res = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> %data, <2 x i1> %notzero, i64 %passthru)
@@ -219,23 +175,13 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass
;
; SVE-FIXED-LABEL: extract_last_float:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s
-; SVE-FIXED-NEXT: index z3.h, #0, #1
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: xtn v1.4h, v1.4s
-; SVE-FIXED-NEXT: and v3.8b, v1.8b, v3.8b
-; SVE-FIXED-NEXT: umaxv h1, v1.4h
-; SVE-FIXED-NEXT: umaxv h3, v3.4h
-; SVE-FIXED-NEXT: fmov w8, s3
-; SVE-FIXED-NEXT: bfi x9, x8, #2, #2
-; SVE-FIXED-NEXT: fmov w8, s1
-; SVE-FIXED-NEXT: ldr s0, [x9]
-; SVE-FIXED-NEXT: tst w8, #0x1
-; SVE-FIXED-NEXT: fcsel s0, s0, s2, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.s
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.s, z1.s, #0x1
+; SVE-FIXED-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; SVE-FIXED-NEXT: clastb s2, p0, s2, z0.s
+; SVE-FIXED-NEXT: fmov s0, s2
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <4 x i32> %mask, zeroinitializer
%res = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> %data, <4 x i1> %notzero, float %passthru)
@@ -267,23 +213,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
;
; SVE-FIXED-LABEL: extract_last_double:
; SVE-FIXED: // %bb.0:
-; SVE-FIXED-NEXT: sub sp, sp, #16
-; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16
; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
-; SVE-FIXED-NEXT: index z3.s, #0, #1
-; SVE-FIXED-NEXT: mov x9, sp
-; SVE-FIXED-NEXT: str q0, [sp]
-; SVE-FIXED-NEXT: xtn v1.2s, v1.2d
-; SVE-FIXED-NEXT: and v3.8b, v1.8b, v3.8b
-; SVE-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s
-; SVE-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s
-; SVE-FIXED-NEXT: fmov w8, s3
-; SVE-FIXED-NEXT: bfi x9, x8, #3, #1
-; SVE-FIXED-NEXT: fmov w8, s1
-; SVE-FIXED-NEXT: ldr d0, [x9]
-; SVE-FIXED-NEXT: tst w8, #0x1
-; SVE-FIXED-NEXT: fcsel d0, d0, d2, ne
-; SVE-FIXED-NEXT: add sp, sp, #16
+; SVE-FIXED-NEXT: ptrue p0.d
+; SVE-FIXED-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-FIXED-NEXT: and z1.d, z1.d, #0x1
+; SVE-FIXED-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; SVE-FIXED-NEXT: clastb d2, p0, d2, z0.d
+; SVE-FIXED-NEXT: fmov d0, d2
; SVE-FIXED-NEXT: ret
%notzero = icmp ne <2 x i64> %mask, zeroinitializer
%res = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> %data, <2 x i1> %notzero, double %passthru)
@@ -293,17 +229,7 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) #0 {
; CHECK-LABEL: extract_last_i8_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.b, #0, #1
-; CHECK-NEXT: mov z2.b, #0 // =0x0
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b
-; CHECK-NEXT: umaxv b1, p1, z1.b
-; CHECK-NEXT: fmov w8, s1
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p1.b, xzr, x8
-; CHECK-NEXT: ptest p0, p0.b
-; CHECK-NEXT: lastb w8, p1, z0.b
-; CHECK-NEXT: csel w0, w8, w0, ne
+; CHECK-NEXT: clastb w0, p0, w0, z0.b
; CHECK-NEXT: ret
%res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru)
ret i8 %res
@@ -312,17 +238,7 @@ define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1>
define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) #0 {
; CHECK-LABEL: extract_last_i16_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.h, #0, #1
-; CHECK-NEXT: mov z2.h, #0 // =0x0
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h
-; CHECK-NEXT: umaxv h1, p1, z1.h
-; CHECK-NEXT: fmov w8, s1
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p2.h, xzr, x8
-; CHECK-NEXT: ptest p1, p0.b
-; CHECK-NEXT: lastb w8, p2, z0.h
-; CHECK-NEXT: csel w0, w8, w0, ne
+; CHECK-NEXT: clastb w0, p0, w0, z0.h
; CHECK-NEXT: ret
%res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru)
ret i16 %res
@@ -331,17 +247,7 @@ define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1
define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) #0 {
; CHECK-LABEL: extract_last_i32_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.s, #0, #1
-; CHECK-NEXT: mov z2.s, #0 // =0x0
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
-; CHECK-NEXT: umaxv s1, p1, z1.s
-; CHECK-NEXT: fmov w8, s1
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p2.s, xzr, x8
-; CHECK-NEXT: ptest p1, p0.b
-; CHECK-NEXT: lastb w8, p2, z0.s
-; CHECK-NEXT: csel w0, w8, w0, ne
+; CHECK-NEXT: clastb w0, p0, w0, z0.s
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru)
ret i32 %res
@@ -350,17 +256,7 @@ define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1
define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) #0 {
; CHECK-LABEL: extract_last_i64_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.d, #0, #1
-; CHECK-NEXT: mov z2.d, #0 // =0x0
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
-; CHECK-NEXT: umaxv d1, p1, z1.d
-; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p2.d, xzr, x8
-; CHECK-NEXT: ptest p1, p0.b
-; CHECK-NEXT: lastb x8, p2, z0.d
-; CHECK-NEXT: csel x0, x8, x0, ne
+; CHECK-NEXT: clastb x0, p0, x0, z0.d
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru)
ret i64 %res
@@ -369,17 +265,8 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 {
; CHECK-LABEL: extract_last_float_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z2.s, #0, #1
-; CHECK-NEXT: mov z3.s, #0 // =0x0
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: sel z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: umaxv s2, p1, z2.s
-; CHECK-NEXT: fmov w8, s2
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p2.s, xzr, x8
-; CHECK-NEXT: ptest p1, p0.b
-; CHECK-NEXT: lastb s0, p2, z0.s
-; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: clastb s1, p0, s1, z0.s
+; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
%res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru)
ret float %res
@@ -388,17 +275,8 @@ define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x
define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) #0 {
; CHECK-LABEL: extract_last_double_scalable:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z2.d, #0, #1
-; CHECK-NEXT: mov z3.d, #0 // =0x0
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sel z2.d, p0, z2.d, z3.d
-; CHECK-NEXT: umaxv d2, p1, z2.d
-; CHECK-NEXT: fmov x8, d2
-; CHECK-NEXT: and x8, x8, #0xff
-; CHECK-NEXT: whilels p2.d, xzr, x8
-; CHECK-NEXT: ptest p1, p0.b
-; CHECK-NEXT: lastb d0, p2, z0.d
-; CHECK-NEXT: fcsel d0, d0, d1, ne
+; CHECK-NEXT: clastb d1, p0, d1, z0.d
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru)
ret double %res
More information about the llvm-commits
mailing list