[llvm] [DAGCombiner] add fold (xor (smin(x, C), C)) and fold (xor (smax(x, C), C)) (PR #155141)

Mon Sep 8 07:26:11 PDT 2025

https://github.com/rez5427 updated https://github.com/llvm/llvm-project/pull/155141

>From f8575c96d83f9040c05d788492edf7a4ac5392a8 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Sun, 24 Aug 2025 13:34:01 +0800
Subject: [PATCH 1/9] [DAGCombiner] add fold (xor (smin(x, C), C)) -> select (x
 < C), xor(x, C), 0

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 +++++++++++
 llvm/test/CodeGen/AArch64/xor-smin-smax.ll    | 75 +++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/xor-smin-smax.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cee593def653c..2d4d74935e1bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10086,6 +10086,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
+  // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
+  // fold (xor (smin(C, x), C)) -> select (x < C), xor(x, C), 0
+  if (N0.getOpcode() == ISD::SMIN && N0.hasOneUse()) {
+    SDValue Op0 = N0.getOperand(0);
+    SDValue Op1 = N0.getOperand(1);
+
+    if (Op1 != N1) {
+      std::swap(Op0, Op1);
+    }
+
+    if (Op1 == N1) {
+      if (isa<ConstantSDNode>(N1)) {
+        EVT CCVT = getSetCCResultType(VT);
+        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETLT);
+        SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
+        SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
+        return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
+      }
+    }
+  }
+
+  // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
+  // fold (xor (smax(C, x), C)) -> select (x > C), xor(x, C), 0
+  if (N0.getOpcode() == ISD::SMAX && N0.hasOneUse()) {
+    SDValue Op0 = N0.getOperand(0);
+    SDValue Op1 = N0.getOperand(1);
+
+    if (Op1 != N1) {
+      std::swap(Op0, Op1);
+    }
+
+    if (Op1 == N1) {
+      if (isa<ConstantSDNode>(N1)) {
+        EVT CCVT = getSetCCResultType(VT);
+        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETGT);
+        SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
+        SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
+        return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
+      }
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
new file mode 100644
index 0000000000000..cfdec2da61c7a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+; Test for DAGCombiner optimization: fold (xor (smin(x, C), C)) -> select (x < C), xor (x, C), 0
+
+define i64 @test_smin_neg_one(i64 %a) {
+; CHECK-LABEL: test_smin_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmn x0, #1
+; CHECK-NEXT:    csinv x0, xzr, x0, ge
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 -1)
+  %retval.0 = xor i64 %1, -1
+  ret i64 %retval.0
+}
+
+define i64 @test_smin_zero(i64 %a) {
+; CHECK-LABEL: test_smin_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, x0, asr #63
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 0)
+  %retval.0 = xor i64 %1, 0
+  ret i64 %retval.0
+}
+
+define i64 @test_smin_constant(i64 %a) {
+; CHECK-LABEL: test_smin_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    cmp x0, #8
+; CHECK-NEXT:    csel x0, x8, xzr, lt
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 8)
+  %retval.0 = xor i64 %1, 8
+  ret i64 %retval.0
+}
+
+; Test for DAGCombiner optimization: fold (xor (smax(x, C), C)) -> select (x > C), xor (x, C), 0
+
+define i64 @test_smax_neg_one(i64 %a) {
+; CHECK-LABEL: test_smax_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x8, x0
+; CHECK-NEXT:    bic x0, x8, x0, asr #63
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 -1)
+  %retval.0 = xor i64 %1, -1
+  ret i64 %retval.0
+}
+
+define i64 @test_smax_zero(i64 %a) {
+; CHECK-LABEL: test_smax_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic x0, x0, x0, asr #63
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 0)
+  %retval.0 = xor i64 %1, 0
+  ret i64 %retval.0
+}
+
+define i64 @test_smax_constant(i64 %a) {
+; CHECK-LABEL: test_smax_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    cmp x0, #8
+; CHECK-NEXT:    csel x0, x8, xzr, gt
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 8)
+  %retval.0 = xor i64 %1, 8
+  ret i64 %retval.0
+}
+
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
\ No newline at end of file

>From c784fd819f1576a7da331f134f20b3b7d5c28f94 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Mon, 25 Aug 2025 21:42:42 +0800
Subject: [PATCH 2/9] Combining umax umin, smin, smax

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  59 ++---
 llvm/test/CodeGen/AArch64/xor-smin-smax.ll    | 205 +++++++++++++++++-
 2 files changed, 235 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d4d74935e1bc..f1b2cf668bfa7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10087,40 +10087,43 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     return Combined;
 
   // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
-  // fold (xor (smin(C, x), C)) -> select (x < C), xor(x, C), 0
-  if (N0.getOpcode() == ISD::SMIN && N0.hasOneUse()) {
-    SDValue Op0 = N0.getOperand(0);
-    SDValue Op1 = N0.getOperand(1);
-
-    if (Op1 != N1) {
-      std::swap(Op0, Op1);
-    }
-
-    if (Op1 == N1) {
-      if (isa<ConstantSDNode>(N1)) {
-        EVT CCVT = getSetCCResultType(VT);
-        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETLT);
-        SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
-        SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
-        return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
-      }
-    }
-  }
-
   // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
-  // fold (xor (smax(C, x), C)) -> select (x > C), xor(x, C), 0
-  if (N0.getOpcode() == ISD::SMAX && N0.hasOneUse()) {
+  // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
+  // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
+  if ((N0.getOpcode() == ISD::SMIN || N0.getOpcode() == ISD::SMAX ||
+       N0.getOpcode() == ISD::UMIN || N0.getOpcode() == ISD::UMAX) &&
+      N0.hasOneUse()) {
     SDValue Op0 = N0.getOperand(0);
     SDValue Op1 = N0.getOperand(1);
 
-    if (Op1 != N1) {
-      std::swap(Op0, Op1);
-    }
-
     if (Op1 == N1) {
-      if (isa<ConstantSDNode>(N1)) {
+      if (isa<ConstantSDNode>(N1) ||
+          ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+        // For vectors, only optimize when the constant is zero or all-ones to
+        // avoid generating more instructions
+        if (VT.isVector()) {
+          ConstantSDNode *N1C = isConstOrConstSplat(N1);
+          if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
+            return SDValue();
+        }
+
         EVT CCVT = getSetCCResultType(VT);
-        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETGT);
+        ISD::CondCode CC;
+        switch (N0.getOpcode()) {
+        case ISD::SMIN:
+          CC = ISD::SETLT;
+          break;
+        case ISD::SMAX:
+          CC = ISD::SETGT;
+          break;
+        case ISD::UMIN:
+          CC = ISD::SETULT;
+          break;
+        case ISD::UMAX:
+          CC = ISD::SETUGT;
+          break;
+        }
+        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, CC);
         SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
         SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
         return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
index cfdec2da61c7a..74d80eeaefd4a 100644
--- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
+++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
@@ -71,5 +71,208 @@ define i64 @test_smax_constant(i64 %a) {
   ret i64 %retval.0
 }
 
+define i64 @test_umin_neg_one(i64 %a) {
+; CHECK-LABEL: test_umin_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn x0, x0
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 -1)
+  %retval.0 = xor i64 %1, -1
+  ret i64 %retval.0
+}
+
+define i64 @test_umin_zero(i64 %a) {
+; CHECK-LABEL: test_umin_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 0)
+  %retval.0 = xor i64 %1, 0
+  ret i64 %retval.0
+}
+
+define i64 @test_umin_constant(i64 %a) {
+; CHECK-LABEL: test_umin_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    cmp x0, #8
+; CHECK-NEXT:    csel x0, x8, xzr, lo
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 8)
+  %retval.0 = xor i64 %1, 8
+  ret i64 %retval.0
+}
+
+define i64 @test_umax_neg_one(i64 %a) {
+; CHECK-LABEL: test_umax_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 -1)
+  %retval.0 = xor i64 %1, -1
+  ret i64 %retval.0
+}
+
+define i64 @test_umax_zero(i64 %a) {
+; CHECK-LABEL: test_umax_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 0)
+  %retval.0 = xor i64 %1, 0
+  ret i64 %retval.0
+}
+
+define i64 @test_umax_constant(i64 %a) {
+; CHECK-LABEL: test_umax_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    cmp x0, #8
+; CHECK-NEXT:    csel x0, x8, xzr, hi
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 8)
+  %retval.0 = xor i64 %1, 8
+  ret i64 %retval.0
+}
+
+; Test vector cases
+
+define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) {
+; CHECK-LABEL: test_smin_vector_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    cmgt v1.4s, v1.4s, v0.4s
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b 
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+  %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_smin_vector_zero(<4 x i32> %a) {
+; CHECK-LABEL: test_smin_vector_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) {
+; CHECK-LABEL: test_smin_vector_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  %retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) {
+; CHECK-LABEL: test_smax_vector_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmge v1.4s, v0.4s, #0
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b 
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+  %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_smax_vector_zero(<4 x i32> %a) {
+; CHECK-LABEL: test_smax_vector_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_smax_vector_constant(<4 x i32> %a) {
+; CHECK-LABEL: test_smax_vector_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  %retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umin_vector_neg_one(<4 x i32> %a) {
+; CHECK-LABEL: test_umin_vector_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+  %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umin_vector_zero(<4 x i32> %a) {
+; CHECK-LABEL: test_umin_vector_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umin_vector_constant(<4 x i32> %a) {
+; CHECK-LABEL: test_umin_vector_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  %retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umax_vector_neg_one(<4 x i32> %a) {
+; CHECK-LABEL: test_umax_vector_neg_one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+  %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umax_vector_zero(<4 x i32> %a) {
+; CHECK-LABEL: test_umax_vector_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %retval.0
+}
+
+define <4 x i32> @test_umax_vector_constant(<4 x i32> %a) {
+; CHECK-LABEL: test_umax_vector_constant:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  %retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %retval.0
+}
+
 declare i64 @llvm.smin.i64(i64, i64)
-declare i64 @llvm.smax.i64(i64, i64)
\ No newline at end of file
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i64 @llvm.umax.i64(i64, i64)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file

>From 76c16c7b9220d23e93878c9f7ee329b0420e94ea Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Wed, 27 Aug 2025 18:10:36 +0800
Subject: [PATCH 3/9] Use sd_match

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f1b2cf668bfa7..6f840ba049184 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10090,11 +10090,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
   // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
   // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
-  if ((N0.getOpcode() == ISD::SMIN || N0.getOpcode() == ISD::SMAX ||
-       N0.getOpcode() == ISD::UMIN || N0.getOpcode() == ISD::UMAX) &&
-      N0.hasOneUse()) {
-    SDValue Op0 = N0.getOperand(0);
-    SDValue Op1 = N0.getOperand(1);
+  SDValue Op0, Op1;
+  if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Value(Op1)),
+                                     m_SMax(m_Value(Op0), m_Value(Op1)),
+                                     m_UMin(m_Value(Op0), m_Value(Op1)),
+                                     m_UMax(m_Value(Op0), m_Value(Op1))))))) {
 
     if (Op1 == N1) {
       if (isa<ConstantSDNode>(N1) ||

>From 5723e18be76004fc9338c13a5e848f799504e5d8 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Wed, 27 Aug 2025 22:08:37 +0800
Subject: [PATCH 4/9] Use m_specific and DL

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6f840ba049184..fd093f7713fe6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10090,44 +10090,42 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
   // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
   // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
-  SDValue Op0, Op1;
-  if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Value(Op1)),
-                                     m_SMax(m_Value(Op0), m_Value(Op1)),
-                                     m_UMin(m_Value(Op0), m_Value(Op1)),
-                                     m_UMax(m_Value(Op0), m_Value(Op1))))))) {
-
-    if (Op1 == N1) {
-      if (isa<ConstantSDNode>(N1) ||
-          ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
-        // For vectors, only optimize when the constant is zero or all-ones to
-        // avoid generating more instructions
-        if (VT.isVector()) {
-          ConstantSDNode *N1C = isConstOrConstSplat(N1);
-          if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
-            return SDValue();
-        }
+  SDValue Op0;
+  if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
+                                     m_SMax(m_Value(Op0), m_Specific(N1)),
+                                     m_UMin(m_Value(Op0), m_Specific(N1)),
+                                     m_UMax(m_Value(Op0), m_Specific(N1))))))) {
+
+    if (isa<ConstantSDNode>(N1) ||
+        ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+      // For vectors, only optimize when the constant is zero or all-ones to
+      // avoid generating more instructions
+      if (VT.isVector()) {
+        ConstantSDNode *N1C = isConstOrConstSplat(N1);
+        if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
+          return SDValue();
+      }
 
-        EVT CCVT = getSetCCResultType(VT);
-        ISD::CondCode CC;
-        switch (N0.getOpcode()) {
-        case ISD::SMIN:
-          CC = ISD::SETLT;
-          break;
-        case ISD::SMAX:
-          CC = ISD::SETGT;
-          break;
-        case ISD::UMIN:
-          CC = ISD::SETULT;
-          break;
-        case ISD::UMAX:
-          CC = ISD::SETUGT;
-          break;
-        }
-        SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, CC);
-        SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
-        SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
-        return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
+      EVT CCVT = getSetCCResultType(VT);
+      ISD::CondCode CC;
+      switch (N0.getOpcode()) {
+      case ISD::SMIN:
+        CC = ISD::SETLT;
+        break;
+      case ISD::SMAX:
+        CC = ISD::SETGT;
+        break;
+      case ISD::UMIN:
+        CC = ISD::SETULT;
+        break;
+      case ISD::UMAX:
+        CC = ISD::SETUGT;
+        break;
       }
+      SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, N1, CC);
+      SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, N1);
+      SDValue Zero = DAG.getConstant(0, DL, VT);
+      return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
     }
   }
 

>From 7e8112696e6f456a5dc3a56442355f4ab25493db Mon Sep 17 00:00:00 2001
From: guan jian <148229859+rez5427 at users.noreply.github.com>
Date: Thu, 28 Aug 2025 00:09:25 +0800
Subject: [PATCH 5/9] Update llvm/test/CodeGen/AArch64/xor-smin-smax.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
index 74d80eeaefd4a..904397a23afd1 100644
--- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
+++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
@@ -275,4 +275,4 @@ declare i64 @llvm.umax.i64(i64, i64)
 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file
+declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)

>From d6359dba47676a7c757f5ec09785b0b510ae1549 Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Fri, 29 Aug 2025 20:48:32 +0800
Subject: [PATCH 6/9] Use freeze N1

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd093f7713fe6..1efb99a136e4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10122,8 +10122,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
         CC = ISD::SETUGT;
         break;
       }
-      SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, N1, CC);
-      SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, N1);
+      SDValue FN1 = DAG.getFreeze(N1);
+      SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
+      SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
       SDValue Zero = DAG.getConstant(0, DL, VT);
       return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
     }

>From d22128e35831104921b9fa45e5f255f5475a631f Mon Sep 17 00:00:00 2001
From: Yui5427 <785369607 at qq.com>
Date: Thu, 4 Sep 2025 20:47:41 +0800
Subject: [PATCH 7/9] remove redundent bracket

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1efb99a136e4d..2b3e912d03f61 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10091,10 +10091,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
   // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
   SDValue Op0;
-  if ((sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
-                                     m_SMax(m_Value(Op0), m_Specific(N1)),
-                                     m_UMin(m_Value(Op0), m_Specific(N1)),
-                                     m_UMax(m_Value(Op0), m_Specific(N1))))))) {
+  if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
+                                    m_SMax(m_Value(Op0), m_Specific(N1)),
+                                    m_UMin(m_Value(Op0), m_Specific(N1)),
+                                    m_UMax(m_Value(Op0), m_Specific(N1)))))) {
 
     if (isa<ConstantSDNode>(N1) ||
         ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {

>From d2bd600fa8dd41d3be3a9d10ac0097cf0b323ff0 Mon Sep 17 00:00:00 2001
From: rez5427 <guanjian at stu.cdut.edu.cn>
Date: Mon, 8 Sep 2025 01:19:28 +0800
Subject: [PATCH 8/9] Add legal

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++++
 llvm/test/CodeGen/AArch64/xor-smin-smax.ll    | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b3e912d03f61..bc838afac2272 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10106,6 +10106,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
           return SDValue();
       }
 
+      // Avoid the fold if the minmax operation is legal and select is expensive
+      if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
+          TLI.isPredictableSelectExpensive())
+        return SDValue();
+
       EVT CCVT = getSetCCResultType(VT);
       ISD::CondCode CC;
       switch (N0.getOpcode()) {
diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
index 904397a23afd1..012a2094f8197 100644
--- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
+++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 | FileCheck %s
 
 ; Test for DAGCombiner optimization: fold (xor (smin(x, C), C)) -> select (x < C), xor (x, C), 0
 
@@ -141,7 +141,7 @@ define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    cmgt v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b 
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -175,7 +175,7 @@ define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) {
 ; CHECK-LABEL: test_smax_vector_neg_one:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmge v1.4s, v0.4s, #0
-; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b 
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>

>From 21ae67aa652dce13ca1c46023207a02616339e45 Mon Sep 17 00:00:00 2001
From: rez5427 <guanjian at stu.cdut.edu.cn>
Date: Mon, 8 Sep 2025 22:25:50 +0800
Subject: [PATCH 9/9] remove constant zero tests

---
 llvm/test/CodeGen/AArch64/xor-smin-smax.ll | 119 ++++-----------------
 1 file changed, 22 insertions(+), 97 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
index 012a2094f8197..2f35bd9c9ddcf 100644
--- a/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
+++ b/llvm/test/CodeGen/AArch64/xor-smin-smax.ll
@@ -7,29 +7,21 @@ define i64 @test_smin_neg_one(i64 %a) {
 ; CHECK-LABEL: test_smin_neg_one:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmn x0, #1
-; CHECK-NEXT:    csinv x0, xzr, x0, ge
+; CHECK-NEXT:    csinv x8, x0, xzr, lt
+; CHECK-NEXT:    mvn x0, x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 -1)
   %retval.0 = xor i64 %1, -1
   ret i64 %retval.0
 }
 
-define i64 @test_smin_zero(i64 %a) {
-; CHECK-LABEL: test_smin_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    and x0, x0, x0, asr #63
-; CHECK-NEXT:    ret
-  %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 0)
-  %retval.0 = xor i64 %1, 0
-  ret i64 %retval.0
-}
-
 define i64 @test_smin_constant(i64 %a) {
 ; CHECK-LABEL: test_smin_constant:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    mov w8, #8
 ; CHECK-NEXT:    cmp x0, #8
-; CHECK-NEXT:    csel x0, x8, xzr, lt
+; CHECK-NEXT:    csel x8, x0, x8, lt
+; CHECK-NEXT:    eor x0, x8, #0x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.smin.i64(i64 %a, i64 8)
   %retval.0 = xor i64 %1, 8
@@ -37,34 +29,25 @@ define i64 @test_smin_constant(i64 %a) {
 }
 
 ; Test for DAGCombiner optimization: fold (xor (smax(x, C), C)) -> select (x > C), xor (x, C), 0
-
 define i64 @test_smax_neg_one(i64 %a) {
 ; CHECK-LABEL: test_smax_neg_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn x8, x0
-; CHECK-NEXT:    bic x0, x8, x0, asr #63
+; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    csinv x8, x0, xzr, ge
+; CHECK-NEXT:    mvn x0, x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 -1)
   %retval.0 = xor i64 %1, -1
   ret i64 %retval.0
 }
 
-define i64 @test_smax_zero(i64 %a) {
-; CHECK-LABEL: test_smax_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic x0, x0, x0, asr #63
-; CHECK-NEXT:    ret
-  %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 0)
-  %retval.0 = xor i64 %1, 0
-  ret i64 %retval.0
-}
-
 define i64 @test_smax_constant(i64 %a) {
 ; CHECK-LABEL: test_smax_constant:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    mov w8, #8
 ; CHECK-NEXT:    cmp x0, #8
-; CHECK-NEXT:    csel x0, x8, xzr, gt
+; CHECK-NEXT:    csel x8, x0, x8, gt
+; CHECK-NEXT:    eor x0, x8, #0x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.smax.i64(i64 %a, i64 8)
   %retval.0 = xor i64 %1, 8
@@ -81,22 +64,13 @@ define i64 @test_umin_neg_one(i64 %a) {
   ret i64 %retval.0
 }
 
-define i64 @test_umin_zero(i64 %a) {
-; CHECK-LABEL: test_umin_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    ret
-  %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 0)
-  %retval.0 = xor i64 %1, 0
-  ret i64 %retval.0
-}
-
 define i64 @test_umin_constant(i64 %a) {
 ; CHECK-LABEL: test_umin_constant:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    mov w8, #8
 ; CHECK-NEXT:    cmp x0, #8
-; CHECK-NEXT:    csel x0, x8, xzr, lo
+; CHECK-NEXT:    csel x8, x0, x8, lo
+; CHECK-NEXT:    eor x0, x8, #0x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.umin.i64(i64 %a, i64 8)
   %retval.0 = xor i64 %1, 8
@@ -113,21 +87,13 @@ define i64 @test_umax_neg_one(i64 %a) {
   ret i64 %retval.0
 }
 
-define i64 @test_umax_zero(i64 %a) {
-; CHECK-LABEL: test_umax_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 0)
-  %retval.0 = xor i64 %1, 0
-  ret i64 %retval.0
-}
-
 define i64 @test_umax_constant(i64 %a) {
 ; CHECK-LABEL: test_umax_constant:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor x8, x0, #0x8
+; CHECK-NEXT:    mov w8, #8
 ; CHECK-NEXT:    cmp x0, #8
-; CHECK-NEXT:    csel x0, x8, xzr, hi
+; CHECK-NEXT:    csel x8, x0, x8, hi
+; CHECK-NEXT:    eor x0, x8, #0x8
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.umax.i64(i64 %a, i64 8)
   %retval.0 = xor i64 %1, 8
@@ -135,30 +101,18 @@ define i64 @test_umax_constant(i64 %a) {
 }
 
 ; Test vector cases
-
 define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) {
 ; CHECK-LABEL: test_smin_vector_neg_one:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT:    cmgt v1.4s, v1.4s, v0.4s
-; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    ret
   %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
   ret <4 x i32> %retval.0
 }
 
-define <4 x i32> @test_smin_vector_zero(<4 x i32> %a) {
-; CHECK-LABEL: test_smin_vector_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
-  %1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
-  ret <4 x i32> %retval.0
-}
-
 define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) {
 ; CHECK-LABEL: test_smin_vector_constant:
 ; CHECK:       // %bb.0:
@@ -174,25 +128,15 @@ define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) {
 define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) {
 ; CHECK-LABEL: test_smax_vector_neg_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v1.4s, v0.4s, #0
-; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    ret
   %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   %retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
   ret <4 x i32> %retval.0
 }
 
-define <4 x i32> @test_smax_vector_zero(<4 x i32> %a) {
-; CHECK-LABEL: test_smax_vector_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
-  %1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
-  ret <4 x i32> %retval.0
-}
-
 define <4 x i32> @test_smax_vector_constant(<4 x i32> %a) {
 ; CHECK-LABEL: test_smax_vector_constant:
 ; CHECK:       // %bb.0:
@@ -215,16 +159,6 @@ define <4 x i32> @test_umin_vector_neg_one(<4 x i32> %a) {
   ret <4 x i32> %retval.0
 }
 
-define <4 x i32> @test_umin_vector_zero(<4 x i32> %a) {
-; CHECK-LABEL: test_umin_vector_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    ret
-  %1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
-  ret <4 x i32> %retval.0
-}
-
 define <4 x i32> @test_umin_vector_constant(<4 x i32> %a) {
 ; CHECK-LABEL: test_umin_vector_constant:
 ; CHECK:       // %bb.0:
@@ -247,15 +181,6 @@ define <4 x i32> @test_umax_vector_neg_one(<4 x i32> %a) {
   ret <4 x i32> %retval.0
 }
 
-define <4 x i32> @test_umax_vector_zero(<4 x i32> %a) {
-; CHECK-LABEL: test_umax_vector_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
-  ret <4 x i32> %retval.0
-}
-
 define <4 x i32> @test_umax_vector_constant(<4 x i32> %a) {
 ; CHECK-LABEL: test_umax_vector_constant:
 ; CHECK:       // %bb.0: