[llvm] a3de21c - [WebAssembly] Ofast pmin/pmax pattern matchers (#100107)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 01:23:52 PDT 2024
Author: Sam Parker
Date: 2024-07-24T09:23:49+01:00
New Revision: a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351
URL: https://github.com/llvm/llvm-project/commit/a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351
DIFF: https://github.com/llvm/llvm-project/commit/a3de21cac1fb8f1dd98cfe1d1443e2d3f0a97351.diff
LOG: [WebAssembly] Ofast pmin/pmax pattern matchers (#100107)
With fast-math, the ordered setcc nodes are converted to setcc nodes
which do not care about NaNs, so add patterns that use setlt, setle,
setgt and setge.
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-arith.ll
llvm/test/CodeGen/WebAssembly/vector-reduce.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..d6c6425b10041 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1270,7 +1270,11 @@ def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
(vselect (setolt $rhs, $lhs), $rhs, $lhs),
(vselect (setole $rhs, $lhs), $rhs, $lhs),
(vselect (setogt $lhs, $rhs), $rhs, $lhs),
- (vselect (setoge $lhs, $rhs), $rhs, $lhs)
+ (vselect (setoge $lhs, $rhs), $rhs, $lhs),
+ (vselect (setlt $lhs, $rhs), $lhs, $rhs),
+ (vselect (setle $lhs, $rhs), $lhs, $rhs),
+ (vselect (setgt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setge $lhs, $rhs), $rhs, $lhs)
]>;
defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
@@ -1279,7 +1283,11 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
(vselect (setogt $rhs, $lhs), $rhs, $lhs),
(vselect (setoge $rhs, $lhs), $rhs, $lhs),
(vselect (setolt $lhs, $rhs), $rhs, $lhs),
- (vselect (setole $lhs, $rhs), $rhs, $lhs)
+ (vselect (setole $lhs, $rhs), $rhs, $lhs),
+ (vselect (setgt $lhs, $rhs), $lhs, $rhs),
+ (vselect (setge $lhs, $rhs), $lhs, $rhs),
+ (vselect (setlt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setle $lhs, $rhs), $rhs, $lhs)
]>;
defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 67388b688e3bb..185c46aa5681e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -12499,6 +12499,210 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %a
}
+define <4 x float> @pmin_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_olt:
+; SIMD128: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; SIMD128-FAST: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128: .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast olt <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128: .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128-FAST: .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128: .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ole:
+; SIMD128: .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; SIMD128-FAST: .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128: .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.le $push0=, $8, $4
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.le $push2=, $7, $3
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.le $push4=, $6, $2
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.le $push6=, $5, $1
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.le $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.le $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.le $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.le $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ole <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_oge:
+; SIMD128: .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; SIMD128-FAST: .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128: .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.ge $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.ge $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.ge $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.ge $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.ge $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.ge $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.ge $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.ge $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast oge <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; SIMD128-LABEL: pmin_int_v4f32:
; SIMD128: .functype pmin_int_v4f32 (v128, v128) -> (v128)
@@ -12619,6 +12823,210 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %a
}
+define <4 x float> @pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128: .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128-FAST: .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128: .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_olt:
+; SIMD128: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; SIMD128-FAST: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128: .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast olt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_oge:
+; SIMD128: .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; SIMD128-FAST: .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128: .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.ge $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.ge $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.ge $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.ge $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.ge $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.ge $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.ge $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.ge $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast oge <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ole:
+; SIMD128: .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; SIMD128-FAST: .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128: .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.le $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.le $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.le $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.le $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.le $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.le $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.le $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.le $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ole <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; SIMD128-LABEL: pmax_int_v4f32:
; SIMD128: .functype pmax_int_v4f32 (v128, v128) -> (v128)
diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
index 4b1a1a8a0c5b6..1d194b640eab2 100644
--- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
+++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
@@ -791,10 +791,8 @@ define double @pairwise_max_v2f64_fast(<2 x double> %arg) {
; SIMD128-LABEL: pairwise_max_v2f64_fast:
; SIMD128: .functype pairwise_max_v2f64_fast (v128) -> (f64)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
-; SIMD128-NEXT: f64x2.gt $push0=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
+; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
+; SIMD128-NEXT: f64x2.pmax $push1=, $0, $pop0
; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0
; SIMD128-NEXT: return $pop2
%res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
@@ -821,15 +819,11 @@ define float @pairwise_max_v4f32_fast(<4 x float> %arg) {
; SIMD128-LABEL: pairwise_max_v4f32_fast:
; SIMD128: .functype pairwise_max_v4f32_fast (v128) -> (f32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: local.tee $push8=, $1=, $pop9
-; SIMD128-NEXT: f32x4.gt $push0=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop8, $pop0
-; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT: i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: local.tee $push4=, $1=, $pop5
-; SIMD128-NEXT: f32x4.gt $push1=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push2=, $pop6, $pop4, $pop1
+; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT: f32x4.pmax $push5=, $0, $pop0
+; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
+; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT: f32x4.pmax $push2=, $pop4, $pop1
; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
; SIMD128-NEXT: return $pop3
%res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
@@ -868,10 +862,8 @@ define double @pairwise_min_v2f64_fast(<2 x double> %arg) {
; SIMD128-LABEL: pairwise_min_v2f64_fast:
; SIMD128: .functype pairwise_min_v2f64_fast (v128) -> (f64)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
-; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
-; SIMD128-NEXT: f64x2.lt $push0=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
+; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
+; SIMD128-NEXT: f64x2.pmin $push1=, $0, $pop0
; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0
; SIMD128-NEXT: return $pop2
%res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
@@ -898,15 +890,11 @@ define float @pairwise_min_v4f32_fast(<4 x float> %arg) {
; SIMD128-LABEL: pairwise_min_v4f32_fast:
; SIMD128: .functype pairwise_min_v4f32_fast (v128) -> (f32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: local.tee $push8=, $1=, $pop9
-; SIMD128-NEXT: f32x4.lt $push0=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop8, $pop0
-; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
-; SIMD128-NEXT: i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-; SIMD128-NEXT: local.tee $push4=, $1=, $pop5
-; SIMD128-NEXT: f32x4.lt $push1=, $0, $1
-; SIMD128-NEXT: v128.bitselect $push2=, $pop6, $pop4, $pop1
+; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT: f32x4.pmin $push5=, $0, $pop0
+; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
+; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+; SIMD128-NEXT: f32x4.pmin $push2=, $pop4, $pop1
; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
; SIMD128-NEXT: return $pop3
%res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
More information about the llvm-commits
mailing list