[llvm] a674a12 - [WebAssembly] Additional patterns for pmin/pax

Fri Feb 10 01:54:59 PST 2023

Author: Samuel Parker
Date: 2023-02-10T09:54:45Z
New Revision: a674a12dd57ef990fe7da4b46b00b441540a2e4a

URL: https://github.com/llvm/llvm-project/commit/a674a12dd57ef990fe7da4b46b00b441540a2e4a
DIFF: https://github.com/llvm/llvm-project/commit/a674a12dd57ef990fe7da4b46b00b441540a2e4a.diff

LOG: [WebAssembly] Additional patterns for pmin/pax

Each operation was missing their inverted condition using olt or ogt.
Also, as we don't need to discern +/-0, I think we should also be
able to use ole and oge.

Differential Revision: https://reviews.llvm.org/D143581

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-arith.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ad2ec40b8b31b..e84432e9f0554 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1166,13 +1166,21 @@ defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
 defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
 
 // Pseudo-minimum: pmin
-def pmin : PatFrag<(ops node:$lhs, node:$rhs),
-                   (vselect (setolt $rhs, $lhs), $rhs, $lhs)>;
+def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
+                    (vselect (setolt $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setole $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setogt $rhs, $lhs), $lhs, $rhs),
+                    (vselect (setoge $rhs, $lhs), $lhs, $rhs)
+]>;
 defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
 
 // Pseudo-maximum: pmax
-def pmax : PatFrag<(ops node:$lhs, node:$rhs),
-                   (vselect (setolt $lhs, $rhs), $rhs, $lhs)>;
+def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
+                    (vselect (setogt $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setoge $lhs, $rhs), $lhs, $rhs),
+                    (vselect (setolt $rhs, $lhs), $lhs, $rhs),
+                    (vselect (setole $rhs, $lhs), $lhs, $rhs)
+]>;
 defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
 
 // Also match the pmin/pmax cases where the operands are int vectors (but the

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 7e0682be73b66..5ec4d3e6eedab 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -13142,19 +13142,15 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
 ; SIMD128-LABEL: min_unordered_v4f32:
 ; SIMD128:         .functype min_unordered_v4f32 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f32x4.gt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f32x4.pmin $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: min_unordered_v4f32:
 ; SIMD128-FAST:         .functype min_unordered_v4f32 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f32x4.gt $push1=, $0, $1
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: min_unordered_v4f32:
@@ -13221,14 +13217,14 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
 ; SIMD128:         .functype max_unordered_v4f32 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
 ; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    f32x4.pmax $push1=, $0, $pop0
+; SIMD128-NEXT:    f32x4.pmax $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: max_unordered_v4f32:
 ; SIMD128-FAST:         .functype max_unordered_v4f32 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
 ; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $pop1
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: max_unordered_v4f32:
@@ -13294,19 +13290,15 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
 ; SIMD128-LABEL: min_ordered_v4f32:
 ; SIMD128:         .functype min_ordered_v4f32 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f32x4.le $push0=, $1, $0
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f32x4.pmin $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: min_ordered_v4f32:
 ; SIMD128-FAST:         .functype min_ordered_v4f32 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f32x4.le $push1=, $1, $0
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: min_ordered_v4f32:
@@ -13372,19 +13364,15 @@ define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
 ; SIMD128-LABEL: max_ordered_v4f32:
 ; SIMD128:         .functype max_ordered_v4f32 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f32x4.ge $push0=, $1, $0
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f32x4.pmax $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: max_ordered_v4f32:
 ; SIMD128-FAST:         .functype max_ordered_v4f32 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f32x4.ge $push1=, $1, $0
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: max_ordered_v4f32:
@@ -14240,13 +14228,13 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
 ; SIMD128-LABEL: pmin_v4f32:
 ; SIMD128:         .functype pmin_v4f32 (v128, v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-NEXT:    f32x4.pmin $push0=, $1, $0
 ; SIMD128-NEXT:    return $pop0
 ;
 ; SIMD128-FAST-LABEL: pmin_v4f32:
 ; SIMD128-FAST:         .functype pmin_v4f32 (v128, v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT:    f32x4.pmin $push0=, $1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: pmin_v4f32:
@@ -14368,13 +14356,13 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
 ; SIMD128-LABEL: pmax_v4f32:
 ; SIMD128:         .functype pmax_v4f32 (v128, v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT:    f32x4.pmax $push0=, $1, $0
 ; SIMD128-NEXT:    return $pop0
 ;
 ; SIMD128-FAST-LABEL: pmax_v4f32:
 ; SIMD128-FAST:         .functype pmax_v4f32 (v128, v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    f32x4.pmax $push0=, $1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: pmax_v4f32:
@@ -14800,19 +14788,15 @@ define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
 ; SIMD128-LABEL: min_unordered_v2f64:
 ; SIMD128:         .functype min_unordered_v2f64 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f64x2.gt $push0=, $0, $1
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f64x2.pmin $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: min_unordered_v2f64:
 ; SIMD128-FAST:         .functype min_unordered_v2f64 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f64x2.gt $push1=, $0, $1
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f64x2.pmin $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: min_unordered_v2f64:
@@ -14855,14 +14839,14 @@ define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
 ; SIMD128:         .functype max_unordered_v2f64 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
 ; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    f64x2.pmax $push1=, $0, $pop0
+; SIMD128-NEXT:    f64x2.pmax $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: max_unordered_v2f64:
 ; SIMD128-FAST:         .functype max_unordered_v2f64 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
 ; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    f64x2.pmax $push0=, $0, $pop1
+; SIMD128-FAST-NEXT:    f64x2.pmax $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: max_unordered_v2f64:
@@ -14904,19 +14888,15 @@ define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
 ; SIMD128-LABEL: min_ordered_v2f64:
 ; SIMD128:         .functype min_ordered_v2f64 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f64x2.le $push0=, $1, $0
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f64x2.pmin $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: min_ordered_v2f64:
 ; SIMD128-FAST:         .functype min_ordered_v2f64 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f64x2.le $push1=, $1, $0
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f64x2.pmin $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: min_ordered_v2f64:
@@ -14958,19 +14938,15 @@ define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
 ; SIMD128-LABEL: max_ordered_v2f64:
 ; SIMD128:         .functype max_ordered_v2f64 (v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-NEXT:    f64x2.ge $push0=, $1, $0
-; SIMD128-NEXT:    v128.bitselect $push1=, $pop2, $0, $pop0
+; SIMD128-NEXT:    v128.const $push0=, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT:    f64x2.pmax $push1=, $pop0, $0
 ; SIMD128-NEXT:    return $pop1
 ;
 ; SIMD128-FAST-LABEL: max_ordered_v2f64:
 ; SIMD128-FAST:         .functype max_ordered_v2f64 (v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    v128.const $push3=, 0x1.4p2, 0x1.4p2
-; SIMD128-FAST-NEXT:    local.tee $push2=, $1=, $pop3
-; SIMD128-FAST-NEXT:    f64x2.ge $push1=, $1, $0
-; SIMD128-FAST-NEXT:    v128.bitselect $push0=, $pop2, $0, $pop1
+; SIMD128-FAST-NEXT:    v128.const $push1=, 0x1.4p2, 0x1.4p2
+; SIMD128-FAST-NEXT:    f64x2.pmax $push0=, $pop1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: max_ordered_v2f64:
@@ -15156,13 +15132,13 @@ define <2 x double> @pmin_v2f64(<2 x double> %x, <2 x double> %y) {
 ; SIMD128-LABEL: pmin_v2f64:
 ; SIMD128:         .functype pmin_v2f64 (v128, v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    f64x2.pmin $push0=, $0, $1
+; SIMD128-NEXT:    f64x2.pmin $push0=, $1, $0
 ; SIMD128-NEXT:    return $pop0
 ;
 ; SIMD128-FAST-LABEL: pmin_v2f64:
 ; SIMD128-FAST:         .functype pmin_v2f64 (v128, v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    f64x2.pmin $push0=, $0, $1
+; SIMD128-FAST-NEXT:    f64x2.pmin $push0=, $1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: pmin_v2f64:
@@ -15244,13 +15220,13 @@ define <2 x double> @pmax_v2f64(<2 x double> %x, <2 x double> %y) {
 ; SIMD128-LABEL: pmax_v2f64:
 ; SIMD128:         .functype pmax_v2f64 (v128, v128) -> (v128)
 ; SIMD128-NEXT:  # %bb.0:
-; SIMD128-NEXT:    f64x2.pmax $push0=, $0, $1
+; SIMD128-NEXT:    f64x2.pmax $push0=, $1, $0
 ; SIMD128-NEXT:    return $pop0
 ;
 ; SIMD128-FAST-LABEL: pmax_v2f64:
 ; SIMD128-FAST:         .functype pmax_v2f64 (v128, v128) -> (v128)
 ; SIMD128-FAST-NEXT:  # %bb.0:
-; SIMD128-FAST-NEXT:    f64x2.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT:    f64x2.pmax $push0=, $1, $0
 ; SIMD128-FAST-NEXT:    return $pop0
 ;
 ; NO-SIMD128-LABEL: pmax_v2f64: