[llvm] [AArch64] Prefer SVE2 for fixed-length i64 [S|U][MIN|MAX] reductions (PR #181161)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 17 03:31:15 PST 2026
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/181161
>From 3dd2cfdaab67b73acc60ff9e746d94b0d98e0709 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 17 Feb 2026 11:23:35 +0000
Subject: [PATCH 1/2] Fixups
---
.../Target/AArch64/AArch64ISelLowering.cpp | 54 +-
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 464 ++++++++++--------
...-streaming-mode-fixed-length-int-reduce.ll | 26 +-
3 files changed, 316 insertions(+), 228 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 028f2114d1d60..10bbfece8195a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,6 +1897,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+ }
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -17304,8 +17311,24 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
return DAG.getAnyExtOrTrunc(Result, DL, VT);
}
+static std::optional<Intrinsic::ID> getPairwiseOpForReduction(unsigned Op) {
+ switch (Op) {
+ case ISD::VECREDUCE_SMIN:
+ return Intrinsic::aarch64_sve_sminp;
+ case ISD::VECREDUCE_SMAX:
+ return Intrinsic::aarch64_sve_smaxp;
+ case ISD::VECREDUCE_UMIN:
+ return Intrinsic::aarch64_sve_uminp;
+ case ISD::VECREDUCE_UMAX:
+ return Intrinsic::aarch64_sve_umaxp;
+ default:
+ return std::nullopt;
+ }
+}
+
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
+ SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
@@ -17313,7 +17336,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
// widening by inserting zeroes.
if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD &&
SrcVT == MVT::v2f16) {
- SDLoc DL(Op);
return DAG.getNode(ISD::FADD, DL, MVT::f16,
DAG.getExtractVectorElt(DL, MVT::f16, Src, 0),
DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
@@ -17327,9 +17349,32 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
- if (SrcVT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
+
+ bool UsesSVEForFixedLengthVT = useSVEForFixedLengthVectorVT(
+ SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors());
+
+ // Always lower try to lower v2i64 pairwise operations (as NEON does not
+ // natively support reductions on these types). Try lowering any v2<ty> vector
+ // to pairwise operations when using SVE for fixed-length VTs, as the pairwise
+ // operations are likely to be cheaper than a full reduction.
+ bool TryPairwiseOps = SrcVT == MVT::v2i64 || (UsesSVEForFixedLengthVT &&
+ SrcVT.getVectorElementCount() == ElementCount::getFixed(2));
+
+ // Attempt to lower v2<ty> reductions to SVE2 pairwise operations.
+ auto PairwiseIID = getPairwiseOpForReduction(Op->getOpcode());
+ if (TryPairwiseOps && PairwiseIID &&
+ (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
+ SDValue VecOp = convertToScalableVector(DAG, ContainerVT, Src);
+ SDValue Rdx = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecOp.getValueType(),
+ DAG.getConstant(*PairwiseIID, DL, MVT::i32), Pg,
+ VecOp, VecOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), Rdx,
+ DAG.getConstant(0, DL, MVT::i64));
+ }
+
+ if (SrcVT.isScalableVector() || UsesSVEForFixedLengthVT) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
@@ -17367,7 +17412,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
// Lower NEON reductions.
- SDLoc DL(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 407d7a4d89ed2..2f0ec2b75bfc6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1,93 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOSVE
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-SVE2
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>)
-
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-
define i8 @smax_B(ptr nocapture readonly %arr) {
; CHECK-LABEL: smax_B:
; CHECK: // %bb.0:
@@ -685,13 +602,21 @@ entry:
}
define i64 @sminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: sminv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -708,21 +633,35 @@ entry:
}
define i64 @sminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: sminv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -747,15 +686,25 @@ entry:
}
define i64 @sminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: sminv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1045,13 +994,21 @@ entry:
}
define i64 @smaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: smaxv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1068,21 +1025,35 @@ entry:
}
define i64 @smaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v0.2d, v2.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: smaxv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1107,15 +1078,25 @@ entry:
}
define i64 @smaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: smaxv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1403,13 +1384,21 @@ entry:
}
define i64 @uminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: uminv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1426,21 +1415,35 @@ entry:
}
define i64 @uminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmhi v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmhi v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: uminv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1465,15 +1468,25 @@ entry:
}
define i64 @uminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: uminv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1750,13 +1763,21 @@ entry:
}
define i64 @umaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: umaxv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1773,22 +1794,35 @@ entry:
}
define i64 @umaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: mov v3.16b, v2.16b
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v3.d[1], xzr
-; CHECK-SD-NEXT: cmhi v3.2d, v0.2d, v3.2d
-; CHECK-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v4.8b
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: mov v3.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v3.d[1], xzr
+; CHECK-SD-NOSVE-NEXT: cmhi v3.2d, v0.2d, v3.2d
+; CHECK-SD-NOSVE-NEXT: ext v4.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v3.16b
+; CHECK-SD-NOSVE-NEXT: and v1.8b, v1.8b, v4.8b
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: umaxv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], xzr
+; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1812,15 +1846,25 @@ entry:
}
define i64 @umaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE2-LABEL: umaxv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 65cbec9cc8d09..682dc0927d810 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -776,7 +776,7 @@ define i32 @smaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: smaxv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: smaxv s0, p0, z0.s
+; CHECK-NEXT: smaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -865,7 +865,7 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: smaxv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: smaxv d0, p0, z0.d
+; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -887,7 +887,7 @@ define i64 @smaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: smaxv d0, p0, z0.d
+; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1277,7 +1277,7 @@ define i32 @sminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: sminv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: sminv s0, p0, z0.s
+; CHECK-NEXT: sminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -1366,7 +1366,7 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: sminv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: sminv d0, p0, z0.d
+; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1388,7 +1388,7 @@ define i64 @sminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: sminv d0, p0, z0.d
+; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1778,7 +1778,7 @@ define i32 @umaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: umaxv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: umaxv s0, p0, z0.s
+; CHECK-NEXT: umaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -1867,7 +1867,7 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: umaxv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: umaxv d0, p0, z0.d
+; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1889,7 +1889,7 @@ define i64 @umaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: umaxv d0, p0, z0.d
+; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -2279,7 +2279,7 @@ define i32 @uminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: uminv_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: uminv s0, p0, z0.s
+; CHECK-NEXT: uminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -2368,7 +2368,7 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: uminv_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: uminv d0, p0, z0.d
+; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -2390,7 +2390,7 @@ define i64 @uminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: uminv d0, p0, z0.d
+; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
>From f8a8f476887e01a1607521cee29d4b1cae47fcd8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 17 Feb 2026 11:30:39 +0000
Subject: [PATCH 2/2] Add comment
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 10bbfece8195a..c9f6cefed8a76 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,6 +1897,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // With SVE2 we can try lowering these to pairwise operations (e.g. smaxp).
if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
More information about the llvm-commits
mailing list