[llvm] [AArch64] Prefer SVE for fixed-length [S|U][MIN|MAX] reductions (PR #181161)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 12 07:04:52 PST 2026
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/181161
For v2i64, NEON does not have min/max reductions, but SVE does. The throughput is about the same, but the SVE code is smaller than the NEON expansion.
>From 617db4564030ae4105b7eab3dc74a8809bd44bcf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 12 Feb 2026 13:37:16 +0000
Subject: [PATCH] [AArch64] Prefer SVE for fixed-length [S|U][MIN|MAX]
reductions
For v2i64, NEON does not have min/max reductions, but SVE does. The
throughput is about the same, but the SVE code is smaller than the NEON
expansion.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 38 +-
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | 464 ++++++++++--------
2 files changed, 276 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 028f2114d1d60..e55a75127235c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1897,6 +1897,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1935,10 +1940,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
@@ -17319,17 +17320,24 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
}
+ bool IsMinMax = Op.getOpcode() == ISD::VECREDUCE_SMIN ||
+ Op.getOpcode() == ISD::VECREDUCE_UMIN ||
+ Op.getOpcode() == ISD::VECREDUCE_SMAX ||
+ Op.getOpcode() == ISD::VECREDUCE_UMAX;
+
// Try to lower fixed length reductions to SVE.
- bool OverrideNEON = !Subtarget->isNeonAvailable() ||
- Op.getOpcode() == ISD::VECREDUCE_AND ||
- Op.getOpcode() == ISD::VECREDUCE_OR ||
- Op.getOpcode() == ISD::VECREDUCE_XOR ||
- Op.getOpcode() == ISD::VECREDUCE_FADD ||
- (Op.getOpcode() != ISD::VECREDUCE_ADD &&
- SrcVT.getVectorElementType() == MVT::i64);
+ bool ForceSVE =
+ Subtarget->useSVEForFixedLengthVectors() &&
+ (!Subtarget->isNeonAvailable() || Op.getOpcode() == ISD::VECREDUCE_AND ||
+ Op.getOpcode() == ISD::VECREDUCE_OR ||
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VECREDUCE_FADD ||
+ (Op.getOpcode() != ISD::VECREDUCE_ADD &&
+ SrcVT.getVectorElementType() == MVT::i64));
+ bool PreferSVE = IsMinMax && SrcVT == MVT::v2i64;
+
if (SrcVT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(SrcVT, ForceSVE || PreferSVE)) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
@@ -31487,9 +31495,7 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
- if (useSVEForFixedLengthVectorVT(
- SrcVT,
- /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ if (!SrcVT.isScalableVector()) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 407d7a4d89ed2..c6dda3f0f3cf7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -1,93 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOSVE
+; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-SVE
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>)
-declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
-declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
-declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>)
-declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
-declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
-declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>)
-declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
-declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>)
-
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-
define i8 @smax_B(ptr nocapture readonly %arr) {
; CHECK-LABEL: smax_B:
; CHECK: // %bb.0:
@@ -685,13 +602,21 @@ entry:
}
define i64 @sminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -708,21 +633,35 @@ entry:
}
define i64 @sminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -747,15 +686,25 @@ entry:
}
define i64 @sminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: sminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: sminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: sminv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: sminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1045,13 +994,21 @@ entry:
}
define i64 @smaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1068,21 +1025,35 @@ entry:
}
define i64 @smaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmgt v1.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmgt v1.2d, v0.2d, v2.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1107,15 +1078,25 @@ entry:
}
define i64 @smaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: smaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmgt d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: smaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmgt d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: smaxv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: smaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1403,13 +1384,21 @@ entry:
}
define i64 @uminv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1426,21 +1415,35 @@ entry:
}
define i64 @uminv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v2.d[1], x8
-; CHECK-SD-NEXT: cmhi v1.2d, v2.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-NOSVE-NEXT: cmhi v1.2d, v2.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], x8
+; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1465,15 +1468,25 @@ entry:
}
define i64 @uminv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: uminv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d1, d0
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: uminv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d1, d0
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: uminv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: uminv_v4i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1750,13 +1763,21 @@ entry:
}
define i64 @umaxv_v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v2i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v2i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v2i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1773,22 +1794,35 @@ entry:
}
define i64 @umaxv_v3i64(<3 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v3i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: mov v3.16b, v2.16b
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: mov v3.d[1], xzr
-; CHECK-SD-NEXT: cmhi v3.2d, v0.2d, v3.2d
-; CHECK-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v4.8b
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v3i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOSVE-NEXT: mov v3.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NOSVE-NEXT: mov v3.d[1], xzr
+; CHECK-SD-NOSVE-NEXT: cmhi v3.2d, v0.2d, v3.2d
+; CHECK-SD-NOSVE-NEXT: ext v4.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v2.16b, v3.16b
+; CHECK-SD-NOSVE-NEXT: and v1.8b, v1.8b, v4.8b
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v3i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: // kill: def $d2 killed $d2 def $z2
+; CHECK-SD-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-SVE-NEXT: mov v2.d[1], xzr
+; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z2.d
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v3i64:
; CHECK-GI: // %bb.0: // %entry
@@ -1812,15 +1846,25 @@ entry:
}
define i64 @umaxv_v4i64(<4 x i64> %a) {
-; CHECK-SD-LABEL: umaxv_v4i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: cmhi d2, d0, d1
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
+; CHECK-SD-NOSVE-LABEL: umaxv_v4i64:
+; CHECK-SD-NOSVE: // %bb.0: // %entry
+; CHECK-SD-NOSVE-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-SD-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NOSVE-NEXT: cmhi d2, d0, d1
+; CHECK-SD-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-SD-NOSVE-NEXT: fmov x0, d0
+; CHECK-SD-NOSVE-NEXT: ret
+;
+; CHECK-SD-SVE-LABEL: umaxv_v4i64:
+; CHECK-SD-SVE: // %bb.0: // %entry
+; CHECK-SD-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SD-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SD-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SD-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SD-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SD-SVE-NEXT: fmov x0, d0
+; CHECK-SD-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umaxv_v4i64:
; CHECK-GI: // %bb.0: // %entry
More information about the llvm-commits
mailing list