[llvm] ecb2e5b - [WebAssembly] Implement v128.select
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 16 11:37:34 PDT 2020
Author: Thomas Lively
Date: 2020-07-16T11:37:25-07:00
New Revision: ecb2e5bcd7e616f62a8c61722ee8d4033c78e32e
URL: https://github.com/llvm/llvm-project/commit/ecb2e5bcd7e616f62a8c61722ee8d4033c78e32e
DIFF: https://github.com/llvm/llvm-project/commit/ecb2e5bcd7e616f62a8c61722ee8d4033c78e32e.diff
LOG: [WebAssembly] Implement v128.select
Although the SIMD spec proposal does not specifically include a
select instruction, the select instruction in MVP WebAssembly is
polymorphic over the selected types, so it is able to work on v128
values when they are enabled. This patch introduces a new variant of
the select instruction for each legal vector type. Additional ISel
patterns are adapted from the SELECT_I32 and SELECT_I64 patterns.
Depends on D83736.
Differential Revision: https://reviews.llvm.org/D83737
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4962f0e9dacb..71b173d76908 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -157,11 +157,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::MUL, MVT::v16i8, Expand);
// There is no vector conditional select instruction
- // TODO: Implement SELECT_V128
- for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
- MVT::v2f64})
- setOperationAction(Op, T, Expand);
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
+ setOperationAction(ISD::SELECT_CC, T, Expand);
// Expand integer operations supported for scalars but not SIMD
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 6463b8c421b9..b603701ab930 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -584,6 +584,37 @@ foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
V128:$v1, V128:$v2, V128:$c
)>;
+// MVP select on v128 values
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond),
+ (outs), (ins),
+ [(set V128:$dst,
+ (select I32:$cond,
+ (vec_t V128:$lhs), (vec_t V128:$rhs)
+ )
+ )],
+ "v128.select\t$dst, $lhs, $rhs, $cond",
+ "v128.select", 0x1b>;
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select
+ (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
+ ),
+ (!cast<Instruction>("SELECT_"#vec_t)
+ V128:$lhs, V128:$rhs, I32:$cond
+ )>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select
+ (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
+ ),
+ (!cast<Instruction>("SELECT_"#vec_t)
+ V128:$rhs, V128:$lhs, I32:$cond
+ )>;
+} // foreach vec_t
+
//===----------------------------------------------------------------------===//
// Integer unary arithmetic
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index ba9de0865587..be36f94cf5a6 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -49,12 +49,8 @@ define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i8x16.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %res
@@ -67,10 +63,9 @@ define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 31
-; CHECK-NEXT: i32.shr_s
-; CHECK-NEXT: i8x16.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -83,12 +78,8 @@ define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i8x16.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -99,14 +90,10 @@ define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: select_eq_v16i8:
; CHECK: .functype select_eq_v16i8 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i8x16.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -153,12 +140,8 @@ define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i16x8.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %res
@@ -171,10 +154,9 @@ define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 31
-; CHECK-NEXT: i32.shr_s
-; CHECK-NEXT: i16x8.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -187,12 +169,8 @@ define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i16x8.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -203,14 +181,10 @@ define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: select_eq_v8i16:
; CHECK: .functype select_eq_v8i16 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i16x8.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -257,12 +231,8 @@ define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %res
@@ -275,10 +245,9 @@ define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 31
-; CHECK-NEXT: i32.shr_s
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -291,12 +260,8 @@ define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -307,14 +272,10 @@ define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: select_eq_v4i32:
; CHECK: .functype select_eq_v4i32 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -376,12 +337,8 @@ define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %res
@@ -393,14 +350,10 @@ define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -413,12 +366,8 @@ define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -429,14 +378,10 @@ define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: select_eq_v2i64:
; CHECK: .functype select_eq_v2i64 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -483,12 +428,8 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <4 x float> %x, <4 x float> %y
ret <4 x float> %res
@@ -501,10 +442,9 @@ define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 31
-; CHECK-NEXT: i32.shr_s
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -517,12 +457,8 @@ define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const -1
-; CHECK-NEXT: i32.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -533,14 +469,10 @@ define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: select_eq_v4f32:
; CHECK: .functype select_eq_v4f32 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.select
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -587,12 +519,8 @@ define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%res = select i1 %c, <2 x double> %x, <2 x double> %y
ret <2 x double> %res
@@ -604,14 +532,10 @@ define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.lt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp slt i32 %i, 0
%res = select i1 %c, <2 x double> %x, <2 x double> %y
@@ -624,12 +548,8 @@ define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp ne i32 %i, 0
%res = select i1 %c, <2 x double> %x, <2 x double> %y
@@ -640,14 +560,10 @@ define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: select_eq_v2f64:
; CHECK: .functype select_eq_v2f64 (i32, v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: v128.select
; CHECK-NEXT: # fallthrough-return
%c = icmp eq i32 %i, 0
%res = select i1 %c, <2 x double> %x, <2 x double> %y
More information about the llvm-commits
mailing list