[llvm-branch-commits] [llvm] [AArch64] Fold MIN/MAX(Vec[0], Vec[1]) to VECREDUCE_MIN/MAX(Vec) (PR #181162)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 12 07:07:21 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
If we have a lowering for `VECREDUCE_MIN/MAX` this is generally more efficient than the scalar expansion.
---
Full diff: https://github.com/llvm/llvm-project/pull/181162.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+48-10)
- (added) llvm/test/CodeGen/AArch64/v2i64-min-max.ll (+99)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e55a75127235c..5621d43201bb1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1160,7 +1160,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS,
ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
ISD::STORE, ISD::BUILD_VECTOR});
- setTargetDAGCombine(ISD::SMIN);
+ setTargetDAGCombine({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX});
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::LOAD);
@@ -22639,14 +22639,6 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::SIGN_EXTEND, DL, DestVT, SQDMULH);
}
-static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG) {
- if (SDValue V = trySQDMULHCombine(N, DAG)) {
- return V;
- }
-
- return SDValue();
-}
-
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(N);
@@ -28825,6 +28817,49 @@ static SDValue performCTPOPCombine(SDNode *N,
return DAG.getNegative(NegPopCount, DL, VT);
}
+static unsigned getReductionForOpcode(unsigned Op) {
+ switch (Op) {
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ default:
+ llvm_unreachable("unimplemented mapping");
+ }
+}
+
+static SDValue performMINMAXCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget &Subtarget,
+ const AArch64TargetLowering &TLI) {
+ using namespace llvm::SDPatternMatch;
+ if (SDValue V = trySQDMULHCombine(N, DAG))
+ return V;
+
+ unsigned ReductionOpcode = getReductionForOpcode(N->getOpcode());
+ if (!TLI.isOperationLegalOrCustom(ReductionOpcode, MVT::v2i64))
+ return SDValue();
+
+ // Fold `min/max(vec[0], vec[1])` to `vecreduce_min/max(vec)` for v2i64.
+
+ APInt Idx;
+ SDValue Vec;
+ if (!sd_match(N->getOperand(0),
+ m_OneUse(m_ExtractElt(m_SpecificVT(MVT::v2i64, m_Value(Vec)),
+ m_ConstInt(Idx)))))
+ return SDValue();
+
+ if (!sd_match(
+ N->getOperand(1),
+ m_OneUse(m_ExtractElt(m_Specific(Vec), m_SpecificInt(1 - Idx)))))
+ return SDValue();
+
+ return DAG.getNode(ReductionOpcode, SDLoc(N), MVT::i64, Vec);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28843,8 +28878,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performAddSubCombine(N, DCI);
case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::SMAX:
case ISD::SMIN:
- return performSMINCombine(N, DAG);
+ return performMINMAXCombine(N, DAG, *Subtarget, *this);
case ISD::TRUNCATE:
return performTruncateCombine(N, DAG, DCI);
case AArch64ISD::ANDS:
diff --git a/llvm/test/CodeGen/AArch64/v2i64-min-max.ll b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll
new file mode 100644
index 0000000000000..43cd59413ba21
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/v2i64-min-max.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s -check-prefix=CHECK-SVE
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s -check-prefix=CHECK-NEON
+
+define i64 @smax(<2 x i64> %0) {
+; CHECK-SVE-LABEL: smax:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: smaxv d0, p0, z0.d
+; CHECK-SVE-NEXT: fmov x0, d0
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-NEON-LABEL: smax:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: mov d1, v0.d[1]
+; CHECK-NEON-NEXT: fmov x8, d0
+; CHECK-NEON-NEXT: fmov x9, d1
+; CHECK-NEON-NEXT: cmp x8, x9
+; CHECK-NEON-NEXT: fcsel d0, d0, d1, gt
+; CHECK-NEON-NEXT: fmov x0, d0
+; CHECK-NEON-NEXT: ret
+ %2 = extractelement <2 x i64> %0, i64 0
+ %3 = extractelement <2 x i64> %0, i64 1
+ %4 = call i64 @llvm.smax.i64(i64 %2, i64 %3)
+ ret i64 %4
+}
+
+define i64 @umax(<2 x i64> %0) {
+; CHECK-SVE-LABEL: umax:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: umaxv d0, p0, z0.d
+; CHECK-SVE-NEXT: fmov x0, d0
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-NEON-LABEL: umax:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: mov d1, v0.d[1]
+; CHECK-NEON-NEXT: fmov x8, d0
+; CHECK-NEON-NEXT: fmov x9, d1
+; CHECK-NEON-NEXT: cmp x8, x9
+; CHECK-NEON-NEXT: fcsel d0, d0, d1, hi
+; CHECK-NEON-NEXT: fmov x0, d0
+; CHECK-NEON-NEXT: ret
+ %2 = extractelement <2 x i64> %0, i64 0
+ %3 = extractelement <2 x i64> %0, i64 1
+ %4 = call i64 @llvm.umax.i64(i64 %2, i64 %3)
+ ret i64 %4
+}
+
+define i64 @smin(<2 x i64> %0) {
+; CHECK-SVE-LABEL: smin:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: sminv d0, p0, z0.d
+; CHECK-SVE-NEXT: fmov x0, d0
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-NEON-LABEL: smin:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: mov d1, v0.d[1]
+; CHECK-NEON-NEXT: fmov x8, d0
+; CHECK-NEON-NEXT: fmov x9, d1
+; CHECK-NEON-NEXT: cmp x8, x9
+; CHECK-NEON-NEXT: fcsel d0, d0, d1, lt
+; CHECK-NEON-NEXT: fmov x0, d0
+; CHECK-NEON-NEXT: ret
+ %2 = extractelement <2 x i64> %0, i64 0
+ %3 = extractelement <2 x i64> %0, i64 1
+ %4 = call i64 @llvm.smin.i64(i64 %2, i64 %3)
+ ret i64 %4
+}
+
+define i64 @umin(<2 x i64> %0) {
+; CHECK-SVE-LABEL: umin:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: uminv d0, p0, z0.d
+; CHECK-SVE-NEXT: fmov x0, d0
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-NEON-LABEL: umin:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: mov d1, v0.d[1]
+; CHECK-NEON-NEXT: fmov x8, d0
+; CHECK-NEON-NEXT: fmov x9, d1
+; CHECK-NEON-NEXT: cmp x8, x9
+; CHECK-NEON-NEXT: fcsel d0, d0, d1, lo
+; CHECK-NEON-NEXT: fmov x0, d0
+; CHECK-NEON-NEXT: ret
+ %2 = extractelement <2 x i64> %0, i64 0
+ %3 = extractelement <2 x i64> %0, i64 1
+ %4 = call i64 @llvm.umin.i64(i64 %2, i64 %3)
+ ret i64 %4
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/181162
More information about the llvm-branch-commits
mailing list