[llvm] a3de21c - [WebAssembly] Ofast pmin/pmax pattern matchers (#100107)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 01:23:52 PDT 2024


Author: Sam Parker
Date: 2024-07-24T09:23:49+01:00
New Revision: a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351

URL: https://github.com/llvm/llvm-project/commit/a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351
DIFF: https://github.com/llvm/llvm-project/commit/a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351.diff

LOG: [WebAssembly] Ofast pmin/pmax pattern matchers (#100107)

With fast-math, the ordered setcc nodes are converted to setcc nodes
which do not care about NaNs, so add patterns that use setlt, setle,
setgt and setge.

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-arith.ll
    llvm/test/CodeGen/WebAssembly/vector-reduce.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..d6c6425b10041 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1270,7 +1270,11 @@ def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
                     (vselect (setolt $rhs, $lhs), $rhs, $lhs),
                     (vselect (setole $rhs, $lhs), $rhs, $lhs),
                     (vselect (setogt $lhs, $rhs), $rhs, $lhs),
-                    (vselect (setoge $lhs, $rhs), $rhs, $lhs)
+                    (vselect (setoge $lhs, $rhs), $rhs, $lhs),
+                    (vselect (setlt $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setle $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setgt $lhs, $rhs), $rhs, $lhs),
+                    (vselect (setge $lhs, $rhs), $rhs, $lhs)
 ]>;
 defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
 
@@ -1279,7 +1283,11 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
                     (vselect (setogt $rhs, $lhs), $rhs, $lhs),
                     (vselect (setoge $rhs, $lhs), $rhs, $lhs),
                     (vselect (setolt $lhs, $rhs), $rhs, $lhs),
-                    (vselect (setole $lhs, $rhs), $rhs, $lhs)
+                    (vselect (setole $lhs, $rhs), $rhs, $lhs),
+                    (vselect (setgt $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setge $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setlt $lhs, $rhs), $rhs, $lhs),
+                    (vselect (setle $lhs, $rhs), $rhs, $lhs)
 ]>;
 defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
 

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 67388b688e3bb..185c46aa5681e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -12499,6 +12499,210 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %a
 }
 
+define <4 x float> @pmin_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_olt:
+; SIMD128:         .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; SIMD128-FAST:         .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128:         .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.lt $push0=, $8, $4
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.lt $push2=, $7, $3
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.lt $push4=, $6, $2
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.lt $push6=, $5, $1
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128-FAST:         .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.lt $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.lt $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.lt $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.lt $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast olt <4 x float> %y, %x
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128:         .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128-FAST:         .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128:         .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128-FAST:         .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast ogt <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ole:
+; SIMD128:         .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; SIMD128-FAST:         .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128:         .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.le $push0=, $8, $4
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.le $push2=, $7, $3
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.le $push4=, $6, $2
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.le $push6=, $5, $1
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128-FAST:         .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.le $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.le $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.le $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.le $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast ole <4 x float> %y, %x
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_oge:
+; SIMD128:         .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; SIMD128-FAST:         .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128:         .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.ge $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.ge $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.ge $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.ge $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128-FAST:         .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.ge $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.ge $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.ge $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.ge $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast oge <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
 define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
 ; SIMD128-LABEL: pmin_int_v4f32:
 ; SIMD128:         .functype pmin_int_v4f32 (v128, v128) -> (v128)
@@ -12619,6 +12823,210 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %a
 }
 
+define <4 x float> @pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128:         .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128-FAST:         .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128:         .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128-FAST:         .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast ogt <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_olt:
+; SIMD128:         .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; SIMD128-FAST:         .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128:         .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128-FAST:         .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast olt <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_oge:
+; SIMD128:         .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; SIMD128-FAST:         .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128:         .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.ge $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.ge $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.ge $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.ge $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128-FAST:         .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.ge $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.ge $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.ge $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.ge $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast oge <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ole:
+; SIMD128:         .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT:    return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; SIMD128-FAST:         .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT:  # %bb.0:
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128:         .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT:  # %bb.0:
+; NO-SIMD128-NEXT:    f32.le $push0=, $4, $8
+; NO-SIMD128-NEXT:    f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT:    f32.store 12($0), $pop1
+; NO-SIMD128-NEXT:    f32.le $push2=, $3, $7
+; NO-SIMD128-NEXT:    f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT:    f32.store 8($0), $pop3
+; NO-SIMD128-NEXT:    f32.le $push4=, $2, $6
+; NO-SIMD128-NEXT:    f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT:    f32.store 4($0), $pop5
+; NO-SIMD128-NEXT:    f32.le $push6=, $1, $5
+; NO-SIMD128-NEXT:    f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT:    f32.store 0($0), $pop7
+; NO-SIMD128-NEXT:    return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128-FAST:         .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT:  # %bb.0:
+; NO-SIMD128-FAST-NEXT:    f32.le $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT:    f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT:    f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT:    f32.le $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT:    f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT:    f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT:    f32.le $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT:    f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT:    f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT:    f32.le $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT:    f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT:    f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT:    return
+  %c = fcmp fast ole <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
 define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
 ; SIMD128-LABEL: pmax_int_v4f32:
 ; SIMD128:         .functype pmax_int_v4f32 (v128, v128) -> (v128)

diff  --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
index 4b1a1a8a0c5b6..1d194b640eab2 100644
--- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
+++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
@@ -791,10 +791,8 @@ define double @pairwise_max_v2f64_fast(<2 x double> %arg) {
 ; SIMD128-LABEL: pairwise_max_v2f64_fast:
 ; SIMD128:         .functype pairwise_max_v2f64_fast (v128) -> (f64)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT:    local.tee $push3=, $1=, $pop4
-; SIMD128-NEXT:    f64x2.gt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push1=, $0, $pop3, $pop0
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
+; SIMD128-NEXT:    f64x2.pmax $push1=, $0, $pop0
 ; SIMD128-NEXT:    f64x2.extract_lane $push2=, $pop1, 0
 ; SIMD128-NEXT:    return $pop2
   %res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
@@ -821,15 +819,11 @@ define float @pairwise_max_v4f32_fast(<4 x float> %arg) {
 ; SIMD128-LABEL: pairwise_max_v4f32_fast:
 ; SIMD128:         .functype pairwise_max_v4f32_fast (v128) -> (f32)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT:    local.tee $push8=, $1=, $pop9
-; SIMD128-NEXT:    f32x4.gt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push7=, $0, $pop8, $pop0
-; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT:    i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT:    local.tee $push4=, $1=, $pop5
-; SIMD128-NEXT:    f32x4.gt $push1=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push2=, $pop6, $pop4, $pop1
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT:    f32x4.pmax $push5=, $0, $pop0
+; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
+; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT:    f32x4.pmax $push2=, $pop4, $pop1
 ; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
 ; SIMD128-NEXT:    return $pop3
   %res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
@@ -868,10 +862,8 @@ define double @pairwise_min_v2f64_fast(<2 x double> %arg) {
 ; SIMD128-LABEL: pairwise_min_v2f64_fast:
 ; SIMD128:         .functype pairwise_min_v2f64_fast (v128) -> (f64)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT:    local.tee $push3=, $1=, $pop4
-; SIMD128-NEXT:    f64x2.lt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push1=, $0, $pop3, $pop0
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
+; SIMD128-NEXT:    f64x2.pmin $push1=, $0, $pop0
 ; SIMD128-NEXT:    f64x2.extract_lane $push2=, $pop1, 0
 ; SIMD128-NEXT:    return $pop2
   %res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
@@ -898,15 +890,11 @@ define float @pairwise_min_v4f32_fast(<4 x float> %arg) {
 ; SIMD128-LABEL: pairwise_min_v4f32_fast:
 ; SIMD128:         .functype pairwise_min_v4f32_fast (v128) -> (f32)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT:    local.tee $push8=, $1=, $pop9
-; SIMD128-NEXT:    f32x4.lt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push7=, $0, $pop8, $pop0
-; SIMD128-NEXT:    local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT:    i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT:    local.tee $push4=, $1=, $pop5
-; SIMD128-NEXT:    f32x4.lt $push1=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push2=, $pop6, $pop4, $pop1
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT:    f32x4.pmin $push5=, $0, $pop0
+; SIMD128-NEXT:    local.tee $push4=, $0=, $pop5
+; SIMD128-NEXT:    i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT:    f32x4.pmin $push2=, $pop4, $pop1
 ; SIMD128-NEXT:    f32x4.extract_lane $push3=, $pop2, 0
 ; SIMD128-NEXT:    return $pop3
   %res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)


        


More information about the llvm-commits mailing list