[llvm] c462846 - [Hexagon] Add HVX support for ISD::SMAX/SMIN/UMAX/UMIN instead of custom dag patterns

Fri Nov 27 07:46:26 PST 2020

Author: Simon Pilgrim
Date: 2020-11-27T15:46:11Z
New Revision: c4628460b74bcdc34041cd11a8959ca336637ee2

URL: https://github.com/llvm/llvm-project/commit/c4628460b74bcdc34041cd11a8959ca336637ee2
DIFF: https://github.com/llvm/llvm-project/commit/c4628460b74bcdc34041cd11a8959ca336637ee2.diff

LOG: [Hexagon] Add HVX support for ISD::SMAX/SMIN/UMAX/UMIN instead of custom dag patterns

Followup to D92112 now that I've learnt about HVX type splitting.

This is some necessary cleanup work for min/max ops to eventually help us move the add/sub sat patterns into DAGCombine - D91876.

Differential Revision: https://reviews.llvm.org/D92169

Added: 
    

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
    llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll
    llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0b06e6ec9a3a..f39c8e889043 100644

--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -102,6 +102,13 @@ HexagonTargetLowering::initializeHVXLowering() {
       setOperationAction(ISD::BSWAP,                    T, Legal);
     }
 
+    setOperationAction(ISD::SMIN,           T, Legal);
+    setOperationAction(ISD::SMAX,           T, Legal);
+    if (T.getScalarType() != MVT::i32) {
+      setOperationAction(ISD::UMIN,         T, Legal);
+      setOperationAction(ISD::UMAX,         T, Legal);
+    }
+
     setOperationAction(ISD::CTTZ,               T, Custom);
     setOperationAction(ISD::LOAD,               T, Custom);
     setOperationAction(ISD::MLOAD,              T, Custom);
@@ -183,6 +190,13 @@ HexagonTargetLowering::initializeHVXLowering() {
       // Promote all shuffles to operate on vectors of bytes.
       setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
     }
+
+    setOperationAction(ISD::SMIN,     T, Custom);
+    setOperationAction(ISD::SMAX,     T, Custom);
+    if (T.getScalarType() != MVT::i32) {
+      setOperationAction(ISD::UMIN,   T, Custom);
+      setOperationAction(ISD::UMAX,   T, Custom);
+    }
   }
 
   // Boolean vectors.
@@ -2029,6 +2043,10 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
       case ISD::SRA:
       case ISD::SHL:
       case ISD::SRL:
+      case ISD::SMIN:
+      case ISD::SMAX:
+      case ISD::UMIN:
+      case ISD::UMAX:
       case ISD::SETCC:
       case ISD::VSELECT:
       case ISD::SIGN_EXTEND:

diff  --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index e37cfe34fa0e..cd894c555adc 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -274,21 +274,6 @@ class Vneg1<ValueType VecTy>
 class Vnot<ValueType VecTy>
   : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
 
-let Predicates = [UseHVX] in {
-  let AddedComplexity = 220 in {
-    defm: MinMax_pats<V6_vminb,  V6_vmaxb,  vselect,  setgt,  VecQ8,  HVI8>;
-    defm: MinMax_pats<V6_vminb,  V6_vmaxb,  vselect,  setge,  VecQ8,  HVI8>;
-    defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setugt,  VecQ8,  HVI8>;
-    defm: MinMax_pats<V6_vminub, V6_vmaxub, vselect, setuge,  VecQ8,  HVI8>;
-    defm: MinMax_pats<V6_vminh,  V6_vmaxh,  vselect,  setgt, VecQ16, HVI16>;
-    defm: MinMax_pats<V6_vminh,  V6_vmaxh,  vselect,  setge, VecQ16, HVI16>;
-    defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setugt, VecQ16, HVI16>;
-    defm: MinMax_pats<V6_vminuh, V6_vmaxuh, vselect, setuge, VecQ16, HVI16>;
-    defm: MinMax_pats<V6_vminw,  V6_vmaxw,  vselect,  setgt, VecQ32, HVI32>;
-    defm: MinMax_pats<V6_vminw,  V6_vmaxw,  vselect,  setge, VecQ32, HVI32>;
-  }
-}
-
 let Predicates = [UseHVX] in {
   let AddedComplexity = 200 in {
     def: Pat<(Vnot<VecI8>   HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
@@ -318,6 +303,17 @@ let Predicates = [UseHVX] in {
   def: OpR_RR_pat<V6_vxor,     Xor,  VecI16, HVI16>;
   def: OpR_RR_pat<V6_vxor,     Xor,  VecI32, HVI32>;
 
+  def: OpR_RR_pat<V6_vminb,   Smin,   VecI8,  HVI8>;
+  def: OpR_RR_pat<V6_vmaxb,   Smax,   VecI8,  HVI8>;
+  def: OpR_RR_pat<V6_vminub,  Umin,   VecI8,  HVI8>;
+  def: OpR_RR_pat<V6_vmaxub,  Umax,   VecI8,  HVI8>;
+  def: OpR_RR_pat<V6_vminh,   Smin,  VecI16, HVI16>;
+  def: OpR_RR_pat<V6_vmaxh,   Smax,  VecI16, HVI16>;
+  def: OpR_RR_pat<V6_vminuh,  Umin,  VecI16, HVI16>;
+  def: OpR_RR_pat<V6_vmaxuh,  Umax,  VecI16, HVI16>;
+  def: OpR_RR_pat<V6_vminw,   Smin,  VecI32, HVI32>;
+  def: OpR_RR_pat<V6_vmaxw,   Smax,  VecI32, HVI32>;
+
   def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
            (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
   def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll
index bd5ca331e750..e3f14966be33 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/minmax-128b.ll
@@ -3,7 +3,7 @@
 ; minb
 
 ; CHECK: test_00:
-; CHECK: v0.b = vmin(v1.b,v0.b)
+; CHECK: v0.b = vmin(v0.b,v1.b)
 define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp slt <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1
@@ -27,7 +27,7 @@ define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1) #0 {
 }
 
 ; CHECK: test_03:
-; CHECK: v0.b = vmin(v1.b,v0.b)
+; CHECK: v0.b = vmin(v0.b,v1.b)
 define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp sge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0
@@ -37,7 +37,7 @@ define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1) #0 {
 ; maxb
 
 ; CHECK: test_04:
-; CHECK: v0.b = vmax(v1.b,v0.b)
+; CHECK: v0.b = vmax(v0.b,v1.b)
 define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp slt <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0
@@ -61,7 +61,7 @@ define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1) #0 {
 }
 
 ; CHECK: test_07:
-; CHECK: v0.b = vmax(v1.b,v0.b)
+; CHECK: v0.b = vmax(v0.b,v1.b)
 define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp sge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1
@@ -71,7 +71,7 @@ define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1) #0 {
 ; minub
 
 ; CHECK: test_08:
-; CHECK: v0.ub = vmin(v1.ub,v0.ub)
+; CHECK: v0.ub = vmin(v0.ub,v1.ub)
 define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp ult <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1
@@ -95,7 +95,7 @@ define <128 x i8> @test_0a(<128 x i8> %v0, <128 x i8> %v1) #0 {
 }
 
 ; CHECK: test_0b:
-; CHECK: v0.ub = vmin(v1.ub,v0.ub)
+; CHECK: v0.ub = vmin(v0.ub,v1.ub)
 define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp uge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0
@@ -105,7 +105,7 @@ define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1) #0 {
 ; maxub
 
 ; CHECK: test_0c:
-; CHECK: v0.ub = vmax(v1.ub,v0.ub)
+; CHECK: v0.ub = vmax(v0.ub,v1.ub)
 define <128 x i8> @test_0c(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp ult <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0
@@ -129,7 +129,7 @@ define <128 x i8> @test_0e(<128 x i8> %v0, <128 x i8> %v1) #0 {
 }
 
 ; CHECK: test_0f:
-; CHECK: v0.ub = vmax(v1.ub,v0.ub)
+; CHECK: v0.ub = vmax(v0.ub,v1.ub)
 define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1) #0 {
   %t0 = icmp uge <128 x i8> %v0, %v1
   %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1
@@ -139,7 +139,7 @@ define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1) #0 {
 ; minh
 
 ; CHECK: test_10:
-; CHECK: v0.h = vmin(v1.h,v0.h)
+; CHECK: v0.h = vmin(v0.h,v1.h)
 define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp slt <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1
@@ -163,7 +163,7 @@ define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1) #0 {
 }
 
 ; CHECK: test_13:
-; CHECK: v0.h = vmin(v1.h,v0.h)
+; CHECK: v0.h = vmin(v0.h,v1.h)
 define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp sge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0
@@ -173,7 +173,7 @@ define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1) #0 {
 ; maxh
 
 ; CHECK: test_14:
-; CHECK: v0.h = vmax(v1.h,v0.h)
+; CHECK: v0.h = vmax(v0.h,v1.h)
 define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp slt <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0
@@ -197,7 +197,7 @@ define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1) #0 {
 }
 
 ; CHECK: test_17:
-; CHECK: v0.h = vmax(v1.h,v0.h)
+; CHECK: v0.h = vmax(v0.h,v1.h)
 define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp sge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1
@@ -207,7 +207,7 @@ define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1) #0 {
 ; minuh
 
 ; CHECK: test_18:
-; CHECK: v0.uh = vmin(v1.uh,v0.uh)
+; CHECK: v0.uh = vmin(v0.uh,v1.uh)
 define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp ult <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1
@@ -231,7 +231,7 @@ define <64 x i16> @test_1a(<64 x i16> %v0, <64 x i16> %v1) #0 {
 }
 
 ; CHECK: test_1b:
-; CHECK: v0.uh = vmin(v1.uh,v0.uh)
+; CHECK: v0.uh = vmin(v0.uh,v1.uh)
 define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp uge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0
@@ -241,7 +241,7 @@ define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1) #0 {
 ; maxuh
 
 ; CHECK: test_1c:
-; CHECK: v0.uh = vmax(v1.uh,v0.uh)
+; CHECK: v0.uh = vmax(v0.uh,v1.uh)
 define <64 x i16> @test_1c(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp ult <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0
@@ -265,7 +265,7 @@ define <64 x i16> @test_1e(<64 x i16> %v0, <64 x i16> %v1) #0 {
 }
 
 ; CHECK: test_1f:
-; CHECK: v0.uh = vmax(v1.uh,v0.uh)
+; CHECK: v0.uh = vmax(v0.uh,v1.uh)
 define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1) #0 {
   %t0 = icmp uge <64 x i16> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1
@@ -275,7 +275,7 @@ define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1) #0 {
 ; minw
 
 ; CHECK: test_20:
-; CHECK: v0.w = vmin(v1.w,v0.w)
+; CHECK: v0.w = vmin(v0.w,v1.w)
 define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1) #0 {
   %t0 = icmp slt <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1
@@ -299,7 +299,7 @@ define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1) #0 {
 }
 
 ; CHECK: test_23:
-; CHECK: v0.w = vmin(v1.w,v0.w)
+; CHECK: v0.w = vmin(v0.w,v1.w)
 define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1) #0 {
   %t0 = icmp sge <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0
@@ -309,7 +309,7 @@ define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1) #0 {
 ; maxw
 
 ; CHECK: test_24:
-; CHECK: v0.w = vmax(v1.w,v0.w)
+; CHECK: v0.w = vmax(v0.w,v1.w)
 define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1) #0 {
   %t0 = icmp slt <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0
@@ -333,7 +333,7 @@ define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1) #0 {
 }
 
 ; CHECK: test_27:
-; CHECK: v0.w = vmax(v1.w,v0.w)
+; CHECK: v0.w = vmax(v0.w,v1.w)
 define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1) #0 {
   %t0 = icmp sge <32 x i32> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll
index 15e33e32296f..4ec758e61387 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/minmax-64b.ll
@@ -3,7 +3,7 @@
 ; minb
 
 ; CHECK: test_00:
-; CHECK: v0.b = vmin(v1.b,v0.b)
+; CHECK: v0.b = vmin(v0.b,v1.b)
 define <64 x i8> @test_00(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp slt <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1
@@ -27,7 +27,7 @@ define <64 x i8> @test_02(<64 x i8> %v0, <64 x i8> %v1) #0 {
 }
 
 ; CHECK: test_03:
-; CHECK: v0.b = vmin(v1.b,v0.b)
+; CHECK: v0.b = vmin(v0.b,v1.b)
 define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp sge <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0
@@ -37,7 +37,7 @@ define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1) #0 {
 ; maxb
 
 ; CHECK: test_04:
-; CHECK: v0.b = vmax(v1.b,v0.b)
+; CHECK: v0.b = vmax(v0.b,v1.b)
 define <64 x i8> @test_04(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp slt <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0
@@ -61,7 +61,7 @@ define <64 x i8> @test_06(<64 x i8> %v0, <64 x i8> %v1) #0 {
 }
 
 ; CHECK: test_07:
-; CHECK: v0.b = vmax(v1.b,v0.b)
+; CHECK: v0.b = vmax(v0.b,v1.b)
 define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp sge <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1
@@ -71,7 +71,7 @@ define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1) #0 {
 ; minub
 
 ; CHECK: test_08:
-; CHECK: v0.ub = vmin(v1.ub,v0.ub)
+; CHECK: v0.ub = vmin(v0.ub,v1.ub)
 define <64 x i8> @test_08(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp ult <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1
@@ -95,7 +95,7 @@ define <64 x i8> @test_0a(<64 x i8> %v0, <64 x i8> %v1) #0 {
 }
 
 ; CHECK: test_0b:
-; CHECK: v0.ub = vmin(v1.ub,v0.ub)
+; CHECK: v0.ub = vmin(v0.ub,v1.ub)
 define <64 x i8> @test_0b(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp uge <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0
@@ -105,7 +105,7 @@ define <64 x i8> @test_0b(<64 x i8> %v0, <64 x i8> %v1) #0 {
 ; maxub
 
 ; CHECK: test_0c:
-; CHECK: v0.ub = vmax(v1.ub,v0.ub)
+; CHECK: v0.ub = vmax(v0.ub,v1.ub)
 define <64 x i8> @test_0c(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp ult <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0
@@ -129,7 +129,7 @@ define <64 x i8> @test_0e(<64 x i8> %v0, <64 x i8> %v1) #0 {
 }
 
 ; CHECK: test_0f:
-; CHECK: v0.ub = vmax(v1.ub,v0.ub)
+; CHECK: v0.ub = vmax(v0.ub,v1.ub)
 define <64 x i8> @test_0f(<64 x i8> %v0, <64 x i8> %v1) #0 {
   %t0 = icmp uge <64 x i8> %v0, %v1
   %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1
@@ -139,7 +139,7 @@ define <64 x i8> @test_0f(<64 x i8> %v0, <64 x i8> %v1) #0 {
 ; minh
 
 ; CHECK: test_10:
-; CHECK: v0.h = vmin(v1.h,v0.h)
+; CHECK: v0.h = vmin(v0.h,v1.h)
 define <32 x i16> @test_10(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp slt <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1
@@ -163,7 +163,7 @@ define <32 x i16> @test_12(<32 x i16> %v0, <32 x i16> %v1) #0 {
 }
 
 ; CHECK: test_13:
-; CHECK: v0.h = vmin(v1.h,v0.h)
+; CHECK: v0.h = vmin(v0.h,v1.h)
 define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp sge <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0
@@ -173,7 +173,7 @@ define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1) #0 {
 ; maxh
 
 ; CHECK: test_14:
-; CHECK: v0.h = vmax(v1.h,v0.h)
+; CHECK: v0.h = vmax(v0.h,v1.h)
 define <32 x i16> @test_14(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp slt <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0
@@ -197,7 +197,7 @@ define <32 x i16> @test_16(<32 x i16> %v0, <32 x i16> %v1) #0 {
 }
 
 ; CHECK: test_17:
-; CHECK: v0.h = vmax(v1.h,v0.h)
+; CHECK: v0.h = vmax(v0.h,v1.h)
 define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp sge <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1
@@ -207,7 +207,7 @@ define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1) #0 {
 ; minuh
 
 ; CHECK: test_18:
-; CHECK: v0.uh = vmin(v1.uh,v0.uh)
+; CHECK: v0.uh = vmin(v0.uh,v1.uh)
 define <32 x i16> @test_18(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp ult <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1
@@ -231,7 +231,7 @@ define <32 x i16> @test_1a(<32 x i16> %v0, <32 x i16> %v1) #0 {
 }
 
 ; CHECK: test_1b:
-; CHECK: v0.uh = vmin(v1.uh,v0.uh)
+; CHECK: v0.uh = vmin(v0.uh,v1.uh)
 define <32 x i16> @test_1b(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp uge <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0
@@ -241,7 +241,7 @@ define <32 x i16> @test_1b(<32 x i16> %v0, <32 x i16> %v1) #0 {
 ; maxuh
 
 ; CHECK: test_1c:
-; CHECK: v0.uh = vmax(v1.uh,v0.uh)
+; CHECK: v0.uh = vmax(v0.uh,v1.uh)
 define <32 x i16> @test_1c(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp ult <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0
@@ -265,7 +265,7 @@ define <32 x i16> @test_1e(<32 x i16> %v0, <32 x i16> %v1) #0 {
 }
 
 ; CHECK: test_1f:
-; CHECK: v0.uh = vmax(v1.uh,v0.uh)
+; CHECK: v0.uh = vmax(v0.uh,v1.uh)
 define <32 x i16> @test_1f(<32 x i16> %v0, <32 x i16> %v1) #0 {
   %t0 = icmp uge <32 x i16> %v0, %v1
   %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1
@@ -275,7 +275,7 @@ define <32 x i16> @test_1f(<32 x i16> %v0, <32 x i16> %v1) #0 {
 ; minw
 
 ; CHECK: test_20:
-; CHECK: v0.w = vmin(v1.w,v0.w)
+; CHECK: v0.w = vmin(v0.w,v1.w)
 define <16 x i32> @test_20(<16 x i32> %v0, <16 x i32> %v1) #0 {
   %t0 = icmp slt <16 x i32> %v0, %v1
   %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1
@@ -299,7 +299,7 @@ define <16 x i32> @test_22(<16 x i32> %v0, <16 x i32> %v1) #0 {
 }
 
 ; CHECK: test_23:
-; CHECK: v0.w = vmin(v1.w,v0.w)
+; CHECK: v0.w = vmin(v0.w,v1.w)
 define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1) #0 {
   %t0 = icmp sge <16 x i32> %v0, %v1
   %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0
@@ -309,7 +309,7 @@ define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1) #0 {
 ; maxw
 
 ; CHECK: test_24:
-; CHECK: v0.w = vmax(v1.w,v0.w)
+; CHECK: v0.w = vmax(v0.w,v1.w)
 define <16 x i32> @test_24(<16 x i32> %v0, <16 x i32> %v1) #0 {
   %t0 = icmp slt <16 x i32> %v0, %v1
   %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0
@@ -333,7 +333,7 @@ define <16 x i32> @test_26(<16 x i32> %v0, <16 x i32> %v1) #0 {
 }
 
 ; CHECK: test_27:
-; CHECK: v0.w = vmax(v1.w,v0.w)
+; CHECK: v0.w = vmax(v0.w,v1.w)
 define <16 x i32> @test_27(<16 x i32> %v0, <16 x i32> %v1) #0 {
   %t0 = icmp sge <16 x i32> %v0, %v1
   %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1