[llvm] b19de81 - [WebAssembly] Improve codegen for v128.bitselect
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 3 23:28:43 PDT 2022
Author: Thomas Lively
Date: 2022-08-03T23:28:37-07:00
New Revision: b19de814add2c2d9496c0b2fcc8a47024a58e491
URL: https://github.com/llvm/llvm-project/commit/b19de814add2c2d9496c0b2fcc8a47024a58e491
DIFF: https://github.com/llvm/llvm-project/commit/b19de814add2c2d9496c0b2fcc8a47024a58e491.diff
LOG: [WebAssembly] Improve codegen for v128.bitselect
Add patterns selecting ((v1 ^ v2) & c) ^ v2 and ((v1 ^ v2) & ~c) ^ v2 to
v128.bitselect.
Resolves #56827.
Reviewed By: aheejin
Differential Revision: https://reviews.llvm.org/D131131
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-arith.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ed3cc7ed1c53d..14202a8e1924b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -811,6 +811,20 @@ def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
(and (vnot V128:$c), (vec.vt V128:$v2)))),
(BITSELECT $v1, $v2, $c)>;
+// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2
+foreach vec = IntVecs in
+def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
+ (vec.vt V128:$c)),
+ (vec.vt V128:$v2))),
+ (BITSELECT $v1, $v2, $c)>;
+
+// Same pattern with `c` negated so `a` and `b` get swapped.
+foreach vec = IntVecs in
+def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
+ (vnot (vec.vt V128:$c))),
+ (vec.vt V128:$v2))),
+ (BITSELECT $v2, $v1, $c)>;
+
// Also implement vselect in terms of bitselect
foreach vec = AllVecs in
def : Pat<(vec.vt (vselect
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index dc33f36b97f2b..78fdccc6e60bb 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -343,6 +343,39 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
ret <16 x i8> %a
}
+; CHECK-LABEL: bitselect_xor_v16i8:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+ %xor1 = xor <16 x i8> %v1, %v2
+ %and = and <16 x i8> %xor1, %c
+ %a = xor <16 x i8> %and, %v2
+ ret <16 x i8> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v16i8:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+ %xor1 = xor <16 x i8> %v1, %v2
+ %notc = xor <16 x i8> %c, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %and = and <16 x i8> %xor1, %notc
+ %a = xor <16 x i8> %and, %v2
+ ret <16 x i8> %a
+}
+
; ==============================================================================
; 8 x i16
; ==============================================================================
@@ -659,6 +692,39 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
ret <8 x i16> %a
}
+; CHECK-LABEL: bitselect_xor_v8i16:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+ %xor1 = xor <8 x i16> %v1, %v2
+ %and = and <8 x i16> %xor1, %c
+ %a = xor <8 x i16> %and, %v2
+ ret <8 x i16> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v8i16:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+ %xor1 = xor <8 x i16> %v1, %v2
+ %notc = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %and = and <8 x i16> %xor1, %notc
+ %a = xor <8 x i16> %and, %v2
+ ret <8 x i16> %a
+}
+
; CHECK-LABEL: extmul_low_s_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
@@ -998,6 +1064,38 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
ret <4 x i32> %a
}
+; CHECK-LABEL: bitselect_xor_v4i32:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+ %xor1 = xor <4 x i32> %v1, %v2
+ %and = and <4 x i32> %xor1, %c
+ %a = xor <4 x i32> %and, %v2
+ ret <4 x i32> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v4i32:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+ %xor1 = xor <4 x i32> %v1, %v2
+ %notc = xor <4 x i32> %c, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and = and <4 x i32> %xor1, %notc
+ %a = xor <4 x i32> %and, %v2
+ ret <4 x i32> %a
+}
+
; CHECK-LABEL: extmul_low_s_v4i32:
; NO-SIMD128-NOT: i32x4
; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
@@ -1390,6 +1488,38 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
ret <2 x i64> %a
}
+; CHECK-LABEL: bitselect_xor_v2i64:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <2 x i64> @bitselect_xor_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+ %xor1 = xor <2 x i64> %v1, %v2
+ %and = and <2 x i64> %xor1, %c
+ %a = xor <2 x i64> %and, %v2
+ ret <2 x i64> %a
+}
+
+; CHECK-LABEL: bitselect_xor_reversed_v2i64:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.xor
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.xor
+define <2 x i64> @bitselect_xor_reversed_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+ %xor1 = xor <2 x i64> %v1, %v2
+ %notc = xor <2 x i64> %c, <i64 -1, i64 -1>
+ %and = and <2 x i64> %xor1, %notc
+ %a = xor <2 x i64> %and, %v2
+ ret <2 x i64> %a
+}
+
; CHECK-LABEL: extmul_low_s_v2i64:
; NO-SIMD128-NOT: i64x2
; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}
More information about the llvm-commits
mailing list