[llvm] [AArch64] Prefer SVE2 for fixed-length i64 [S|U][MIN|MAX] reductions (PR #181161)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 10:00:20 PST 2026
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/181161
>From 617db4564030ae4105b7eab3dc74a8809bd44bcf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 12 Feb 2026 13:37:16 +0000
Subject: [PATCH 1/4] [AArch64] Prefer SVE for fixed-length [S|U][MIN|MAX]
reductions
For v2i64, NEON does not have min/max reductions, but SVE does. The
throughput is about the same, but the SVE code is smaller than the NEON
expansion.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 38 +-
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 464 ++++++++++--------
2 files changed, 276 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 028f2114d1d60..e55a75127235c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,6 +1897,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1935,10 +1940,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
@@ -17319,17 +17320,24 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
}
+ bool IsMinMax = Op.getOpcode() == ISD::VECREDUCE_SMIN ||
+ Op.getOpcode() == ISD::VECREDUCE_UMIN ||
+ Op.getOpcode() == ISD::VECREDUCE_SMAX ||
+ Op.getOpcode() == ISD::VECREDUCE_UMAX;
+
// Try to lower fixed length reductions to SVE.
- bool OverrideNEON = !Subtarget->isNeonAvailable() ||
- Op.getOpcode() == ISD::VECREDUCE_AND ||
- Op.getOpcode() == ISD::VECREDUCE_OR ||
- Op.getOpcode() == ISD::VECREDUCE_XOR ||
- Op.getOpcode() == ISD::VECREDUCE_FADD ||
- (Op.getOpcode() != ISD::VECREDUCE_ADD &&
- SrcVT.getVectorElementType() == MVT::i64);
+ bool ForceSVE =
+ Subtarget->useSVEForFixedLengthVectors() &&
+ (!Subtarget->isNeonAvailable() || Op.getOpcode() == ISD::VECREDUCE_AND ||
+ Op.getOpcode() == ISD::VECREDUCE_OR ||
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VECREDUCE_FADD ||
+ (Op.getOpcode() != ISD::VECREDUCE_ADD &&
+ SrcVT.getVectorElementType() == MVT::i64));
+ bool PreferSVE = IsMinMax && SrcVT == MVT::v2i64;
+
if (SrcVT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(SrcVT, ForceSVE || PreferSVE)) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
@@ -31487,9 +31495,7 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
- if (useSVEForFixedLengthVectorVT(
- SrcVT,
- /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ if (!SrcVT.isScalableVector()) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 407d7a4d89ed2..c6dda3f0f3cf7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1,93 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOSVE
+; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-SVE
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>)
-
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-
define i8 @smax_B(ptr nocapture readonly %arr) {
; CHECK-LABEL: smax_B:
; CHECK: // %bb.0:
@@ -685,13 +602,21 @@ entry:
}
define i64 @sminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -708,21 +633,35 @@ entry:
}
define i64 @sminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -747,15 +686,25 @@ entry:
}
define i64 @sminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1045,13 +994,21 @@ entry:
}
define i64 @smaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1068,21 +1025,35 @@ entry:
}
define i64 @smaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v0.2d, v2.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1107,15 +1078,25 @@ entry:
}
define i64 @smaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1403,13 +1384,21 @@ entry:
}
define i64 @uminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1426,21 +1415,35 @@ entry:
}
define i64 @uminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmhi v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmhi v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1465,15 +1468,25 @@ entry:
}
define i64 @uminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1750,13 +1763,21 @@ entry:
}
define i64 @umaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1773,22 +1794,35 @@ entry:
}
define i64 @umaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: mov v3.16b, v2.16b
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v3.d[1], xzr
-; CHECK-SD-NEXT: cmhi v3.2d, v0.2d, v3.2d
-; CHECK-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v4.8b
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: mov v3.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v3.d[1], xzr
+; CHECK-SD-NOSVE-NEXT: cmhi v3.2d, v0.2d, v3.2d
+; CHECK-SD-NOSVE-NEXT: ext v4.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v3.16b
+; CHECK-SD-NOSVE-NEXT: and v1.8b, v1.8b, v4.8b
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], xzr
+; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1812,15 +1846,25 @@ entry:
}
define i64 @umaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
>From 80cf3670d40024cb127c389e9a73ec55237c8540 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Feb 2026 12:59:23 +0000
Subject: [PATCH 2/4] Fixups
---
.../Target/AArch64/AArch64ISelLowering.cpp | 52 +++-
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 240 +++++++++---------
...-streaming-mode-fixed-length-int-reduce.ll | 46 ++--
3 files changed, 194 insertions(+), 144 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e55a75127235c..ad2642d7f46be 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,10 +1897,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+ if (Subtarget->hasSVE2() || Subtarget->hasSME() ||
+ Subtarget->useSVEForFixedLengthVectors()) {
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+ }
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
@@ -17334,7 +17337,8 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64));
- bool PreferSVE = IsMinMax && SrcVT == MVT::v2i64;
+ bool PreferSVE = IsMinMax && (Subtarget->hasSVE2() || Subtarget->hasSME()) &&
+ SrcVT == MVT::v2i64;
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(SrcVT, ForceSVE || PreferSVE)) {
@@ -31488,6 +31492,21 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
return SDValue();
}
+static std::optional<Intrinsic::ID> getPairwiseOpForReduction(unsigned Op) {
+ switch (Op) {
+ case ISD::VECREDUCE_SMIN:
+ return Intrinsic::aarch64_sve_sminp;
+ case ISD::VECREDUCE_SMAX:
+ return Intrinsic::aarch64_sve_smaxp;
+ case ISD::VECREDUCE_UMIN:
+ return Intrinsic::aarch64_sve_uminp;
+ case ISD::VECREDUCE_UMAX:
+ return Intrinsic::aarch64_sve_umaxp;
+ default:
+ return std::nullopt;
+ }
+}
+
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue ScalarOp,
SelectionDAG &DAG) const {
@@ -31517,12 +31536,31 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();
+
+ SDValue Rdx;
+ // Lower v2<ty> VECREDUCE_[US](MIN|MAX) to pairwise operation with SVE2/SME.
+ if (SrcVT.getVectorElementCount() == ElementCount::getFixed(2) &&
+ (Subtarget->hasSVE2() || Subtarget->hasSME())) {
+ if (std::optional<Intrinsic::ID> PairwiseIID =
+ getPairwiseOpForReduction(ScalarOp->getOpcode())) {
+ EVT VT = VecOp.getValueType();
+ EVT PredVT = VT.changeVectorElementType(*DAG.getContext(), MVT::i1);
+ // VL1 as the pairwise operations use two input lanes per one output lane.
+ SDValue PTrueVL1 = getPTrue(DAG, DL, PredVT, AArch64SVEPredPattern::vl1);
+ Rdx = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(*PairwiseIID, DL, MVT::i32), PTrueVL1,
+ VecOp, VecOp);
+ }
+ }
+
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
- SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
- SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
+ if (!Rdx) {
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
+ }
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index c6dda3f0f3cf7..eedd532bd770a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOSVE
-; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-SVE
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-SVE2
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
@@ -610,13 +610,13 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: sminv_v2i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: sminv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -649,19 +649,20 @@ define i64 @sminv_v3i64(<3 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: sminv_v3i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
-; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-SD-SVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
-; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: sminv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -696,15 +697,16 @@ define i64 @sminv_v4i64(<4 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: sminv_v4i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: sminv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1002,13 +1004,13 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: smaxv_v2i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: smaxv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1041,19 +1043,20 @@ define i64 @smaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: smaxv_v3i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
-; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-SD-SVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
-; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: smaxv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1088,15 +1091,16 @@ define i64 @smaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: smaxv_v4i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: smaxv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1392,13 +1396,13 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: uminv_v2i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: uminv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1431,19 +1435,20 @@ define i64 @uminv_v3i64(<3 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: uminv_v3i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
-; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-SD-SVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
-; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: uminv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1478,15 +1483,16 @@ define i64 @uminv_v4i64(<4 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: uminv_v4i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: uminv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1771,13 +1777,13 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: umaxv_v2i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: umaxv_v2i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1811,18 +1817,19 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: umaxv_v3i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
-; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-SVE-NEXT: mov v2.d[1], xzr
-; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: umaxv_v3i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE2-NEXT: mov v2.d[1], xzr
+; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1856,15 +1863,16 @@ define i64 @umaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-NOSVE-NEXT: fmov x0, d0
; CHECK-SD-NOSVE-NEXT: ret
;
-; CHECK-SD-SVE-LABEL: umaxv_v4i64:
-; CHECK-SD-SVE: // %bb.0: // %entry
-; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
-; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
-; CHECK-SD-SVE-NEXT: fmov x0, d0
-; CHECK-SD-SVE-NEXT: ret
+; CHECK-SD-SVE2-LABEL: umaxv_v4i64:
+; CHECK-SD-SVE2: // %bb.0: // %entry
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
+; CHECK-SD-SVE2-NEXT: fmov x0, d0
+; CHECK-SD-SVE2-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 65cbec9cc8d09..0f33e1e9f1679 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -775,8 +775,8 @@ define i16 @smaxv_v16i16(ptr %a) {
define i32 @smaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: smaxv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: smaxv s0, p0, z0.s
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: smaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -864,8 +864,8 @@ define i32 @smaxv_v8i32(ptr %a) {
define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: smaxv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: smaxv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -887,7 +887,8 @@ define i64 @smaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: smaxv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1276,8 +1277,8 @@ define i16 @sminv_v16i16(ptr %a) {
define i32 @sminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: sminv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: sminv s0, p0, z0.s
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: sminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -1365,8 +1366,8 @@ define i32 @sminv_v8i32(ptr %a) {
define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: sminv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: sminv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1388,7 +1389,8 @@ define i64 @sminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: sminv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1777,8 +1779,8 @@ define i16 @umaxv_v16i16(ptr %a) {
define i32 @umaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: umaxv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: umaxv s0, p0, z0.s
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: umaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -1866,8 +1868,8 @@ define i32 @umaxv_v8i32(ptr %a) {
define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: umaxv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: umaxv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -1889,7 +1891,8 @@ define i64 @umaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: umaxv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -2278,8 +2281,8 @@ define i16 @uminv_v16i16(ptr %a) {
define i32 @uminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: uminv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: uminv s0, p0, z0.s
+; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: uminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
;
@@ -2367,8 +2370,8 @@ define i32 @uminv_v8i32(ptr %a) {
define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: uminv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: uminv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -2390,7 +2393,8 @@ define i64 @uminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: uminv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
>From 1afd10349df7625c68c4c267e7fd647031f843e6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Feb 2026 17:56:23 +0000
Subject: [PATCH 3/4] Fixups
---
.../Target/AArch64/AArch64ISelLowering.cpp | 35 ++++++++++---------
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 16 +++------
...-streaming-mode-fixed-length-int-reduce.ll | 20 +++++------
3 files changed, 31 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ad2642d7f46be..6395345c64609 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,7 +1897,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- if (Subtarget->hasSVE2() || Subtarget->hasSME() ||
+ if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable() ||
Subtarget->useSVEForFixedLengthVectors()) {
setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
@@ -1943,6 +1943,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
@@ -17337,8 +17341,10 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64));
- bool PreferSVE = IsMinMax && (Subtarget->hasSVE2() || Subtarget->hasSME()) &&
- SrcVT == MVT::v2i64;
+ bool PreferSVE =
+ IsMinMax &&
+ (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) &&
+ SrcVT == MVT::v2i64;
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(SrcVT, ForceSVE || PreferSVE)) {
@@ -31534,8 +31540,9 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
}
// UADDV always returns an i64 result.
- EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
- SrcVT.getVectorElementType();
+ EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64
+ : SrcVT.getVectorElementType();
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx;
// Lower v2<ty> VECREDUCE_[US](MIN|MAX) to pairwise operation with SVE2/SME.
@@ -31543,22 +31550,18 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
(Subtarget->hasSVE2() || Subtarget->hasSME())) {
if (std::optional<Intrinsic::ID> PairwiseIID =
getPairwiseOpForReduction(ScalarOp->getOpcode())) {
- EVT VT = VecOp.getValueType();
- EVT PredVT = VT.changeVectorElementType(*DAG.getContext(), MVT::i1);
// VL1 as the pairwise operations use two input lanes per one output lane.
- SDValue PTrueVL1 = getPTrue(DAG, DL, PredVT, AArch64SVEPredPattern::vl1);
- Rdx = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(*PairwiseIID, DL, MVT::i32), PTrueVL1,
- VecOp, VecOp);
+ Rdx = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecOp.getValueType(),
+ DAG.getConstant(*PairwiseIID, DL, MVT::i32), Pg, VecOp,
+ VecOp);
}
}
- EVT RdxVT = SrcVT;
- if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
- RdxVT = getPackedSVEVectorVT(ResVT);
-
if (!Rdx) {
- SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ EVT RdxVT = SrcVT;
+ if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
+ RdxVT = getPackedSVEVectorVT(ResVT);
+
Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
}
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index eedd532bd770a..2f0ec2b75bfc6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -612,7 +612,7 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
;
; CHECK-SD-SVE2-LABEL: sminv_v2i64:
; CHECK-SD-SVE2: // %bb.0: // %entry
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
@@ -659,7 +659,6 @@ define i64 @sminv_v3i64(<3 x i64> %a) {
; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -703,7 +702,6 @@ define i64 @sminv_v4i64(<4 x i64> %a) {
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SD-SVE2-NEXT: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1006,7 +1004,7 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
;
; CHECK-SD-SVE2-LABEL: smaxv_v2i64:
; CHECK-SD-SVE2: // %bb.0: // %entry
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
@@ -1053,7 +1051,6 @@ define i64 @smaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1097,7 +1094,6 @@ define i64 @smaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SD-SVE2-NEXT: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1398,7 +1394,7 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
;
; CHECK-SD-SVE2-LABEL: uminv_v2i64:
; CHECK-SD-SVE2: // %bb.0: // %entry
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
@@ -1445,7 +1441,6 @@ define i64 @uminv_v3i64(<3 x i64> %a) {
; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-SVE2-NEXT: mov v2.d[1], x8
; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1489,7 +1484,6 @@ define i64 @uminv_v4i64(<4 x i64> %a) {
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SD-SVE2-NEXT: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1779,7 +1773,7 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
;
; CHECK-SD-SVE2-LABEL: umaxv_v2i64:
; CHECK-SD-SVE2: // %bb.0: // %entry
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
+; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl2
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
@@ -1826,7 +1820,6 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-SVE2-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-SVE2-NEXT: mov v2.d[1], xzr
; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z2.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
@@ -1869,7 +1862,6 @@ define i64 @umaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-SD-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SD-SVE2-NEXT: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-SD-SVE2-NEXT: ptrue p0.d, vl1
; CHECK-SD-SVE2-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-SD-SVE2-NEXT: fmov x0, d0
; CHECK-SD-SVE2-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 0f33e1e9f1679..682dc0927d810 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -775,7 +775,7 @@ define i16 @smaxv_v16i16(ptr %a) {
define i32 @smaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: smaxv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: smaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
@@ -864,7 +864,7 @@ define i32 @smaxv_v8i32(ptr %a) {
define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: smaxv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -887,7 +887,6 @@ define i64 @smaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: smaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -1277,7 +1276,7 @@ define i16 @sminv_v16i16(ptr %a) {
define i32 @sminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: sminv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: sminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
@@ -1366,7 +1365,7 @@ define i32 @sminv_v8i32(ptr %a) {
define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: sminv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -1389,7 +1388,6 @@ define i64 @sminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: sminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -1779,7 +1777,7 @@ define i16 @umaxv_v16i16(ptr %a) {
define i32 @umaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: umaxv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: umaxp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
@@ -1868,7 +1866,7 @@ define i32 @umaxv_v8i32(ptr %a) {
define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: umaxv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -1891,7 +1889,6 @@ define i64 @umaxv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: umaxp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -2281,7 +2278,7 @@ define i16 @uminv_v16i16(ptr %a) {
define i32 @uminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: uminv_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl1
+; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: uminp z0.s, p0/m, z0.s, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
@@ -2370,7 +2367,7 @@ define i32 @uminv_v8i32(ptr %a) {
define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-LABEL: uminv_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -2393,7 +2390,6 @@ define i64 @uminv_v4i64(ptr %a) {
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: uminp z0.d, p0/m, z0.d, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
>From 3991d0089d0b52f12b28002e3e202e458b9ce716 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 16 Feb 2026 18:00:03 +0000
Subject: [PATCH 4/4] Remove comment
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6395345c64609..bc57ec5a8acc0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31550,7 +31550,6 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
(Subtarget->hasSVE2() || Subtarget->hasSME())) {
if (std::optional<Intrinsic::ID> PairwiseIID =
getPairwiseOpForReduction(ScalarOp->getOpcode())) {
- // VL1 as the pairwise operations use two input lanes per one output lane.
Rdx = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecOp.getValueType(),
DAG.getConstant(*PairwiseIID, DL, MVT::i32), Pg, VecOp,
VecOp);
More information about the llvm-commits
mailing list