[llvm] f0f9787 - [WebAssembly] Lower vselect to v128.bitselect

Thu Jul 16 11:11:27 PDT 2020

Author: Thomas Lively
Date: 2020-07-16T11:11:19-07:00
New Revision: f0f97876469e7e0461ba3521f38bdb889cee05b7

URL: https://github.com/llvm/llvm-project/commit/f0f97876469e7e0461ba3521f38bdb889cee05b7
DIFF: https://github.com/llvm/llvm-project/commit/f0f97876469e7e0461ba3521f38bdb889cee05b7.diff

LOG: [WebAssembly] Lower vselect to v128.bitselect

We were previously expanding vselect and matching on the expansion to
generate bitselects, but in some cases the expansion would be further
combined and a bitselect would not get generated. This patch improves
codegen in those cases by legalizing vselect and lowering it to
v128.bitselect. The old pattern that matches the expansion is still
useful for lowering IR that already uses the expansion rather than a
select operation.

Differential Revision: https://reviews.llvm.org/D83734

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a9b9eceb4130..4962f0e9dacb 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -156,8 +156,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     // There is no i8x16.mul instruction
     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
 
-    // There are no vector select instructions
-    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
+    // There is no vector conditional select instruction
+    // TODO: Implement SELECT_V128
+    for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
                      MVT::v2f64})
         setOperationAction(Op, T, Expand);

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 4f3da2f35c61..6463b8c421b9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -574,6 +574,16 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
             (!cast<Instruction>("BITSELECT_"#vec_t)
               V128:$v1, V128:$v2, V128:$c)>;
 
+// Also implement vselect in terms of bitselect
+foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
+                 [v4f32, v4i32], [v2f64, v2i64]] in
+  def : Pat<(types[0] (vselect
+              (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
+            )),
+            (!cast<Instruction>("BITSELECT_"#types[0])
+              V128:$v1, V128:$v2, V128:$c
+            )>;
+
 //===----------------------------------------------------------------------===//
 // Integer unary arithmetic
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index c3af6f9abe60..553cfe83f86c 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -21,6 +21,18 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
   ret <16 x i8> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v16i8:
+; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
+                                    <16 x i8> %x, <16 x i8> %y) {
+  %c = icmp slt <16 x i8> %a, %b
+  %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
+  ret <16 x i8> %res
+}
+
 ; CHECK-LABEL: select_v16i8:
 ; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -91,6 +103,18 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
   ret <8 x i16> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v8i16:
+; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
+                                           <8 x i16> %x, <8 x i16> %y) {
+  %c = icmp slt <8 x i16> %a, %b
+  %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %res
+}
+
 ; CHECK-LABEL: select_v8i16:
 ; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -161,6 +185,17 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v4i32:
+; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
+                                    <4 x i32> %x, <4 x i32> %y) {
+  %c = icmp slt <4 x i32> %a, %b
+  %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %res
+}
 
 ; CHECK-LABEL: select_v4i32:
 ; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
@@ -232,6 +267,31 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
   ret <2 x i64> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v2i64:
+; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
+; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
+; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
+; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
+; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
+                                    <2 x i64> %x, <2 x i64> %y) {
+  %c = icmp slt <2 x i64> %a, %b
+  %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %res
+}
+
 ; CHECK-LABEL: select_v2i64:
 ; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -305,6 +365,18 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
   ret <4 x float> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v4f32:
+; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
+                                      <4 x float> %x, <4 x float> %y) {
+  %c = fcmp olt <4 x float> %a, %b
+  %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %res
+}
+
 ; CHECK-LABEL: select_v4f32:
 ; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -375,6 +447,18 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v2f64:
+; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
+                                       <2 x double> %x, <2 x double> %y) {
+  %c = fcmp olt <2 x double> %a, %b
+  %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %res
+}
+
 ; CHECK-LABEL: select_v2f64:
 ; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}