[llvm] b59c6fc - [WebAssembly] Prefer v128.const for constant splats

Fri Jul 10 18:28:00 PDT 2020

Author: Thomas Lively
Date: 2020-07-10T18:27:52-07:00
New Revision: b59c6fcaf3fc8fd4c42daeecf0545e47b37b1aa7

URL: https://github.com/llvm/llvm-project/commit/b59c6fcaf3fc8fd4c42daeecf0545e47b37b1aa7
DIFF: https://github.com/llvm/llvm-project/commit/b59c6fcaf3fc8fd4c42daeecf0545e47b37b1aa7.diff

LOG: [WebAssembly] Prefer v128.const for constant splats

In BUILD_VECTOR lowering, we used to generally prefer using splats
over v128.const instructions because v128.const has a very large
encoding. However, in d5b7a4e2e8 we switched to preferring consts
because they are expected to be more efficient in engines. This patch
updates the ISel patterns to match this current preference.

Differential Revision: https://reviews.llvm.org/D83581

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-arith.ll
    llvm/test/CodeGen/WebAssembly/simd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 814bb80fb693..4f3da2f35c61 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -328,8 +328,6 @@ def splat16 : PatFrag<(ops node:$x), (build_vector
 
 multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
                  PatFrag splat_pat, bits<32> simdop> {
-  // Prefer splats over v128.const for const splats (65 is lowest that works)
-  let AddedComplexity = 65 in
   defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
                              [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
                              vec#".splat\t$dst, $x", vec#".splat", simdop>;

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index c56566991a8c..fca4710b582f 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -1278,9 +1278,8 @@ define <4 x float> @abs_v4f32(<4 x float> %x) {
 ; CHECK-LABEL: min_unordered_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
   %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
@@ -1292,9 +1291,8 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
 ; CHECK-LABEL: max_unordered_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2
+; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
   %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
@@ -1306,9 +1304,8 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
 ; CHECK-LABEL: min_ordered_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
   %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
@@ -1320,9 +1317,8 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
 ; CHECK-LABEL: max_ordered_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
   %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
@@ -1378,8 +1374,7 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: min_const_intrinsic_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
-; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
+; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @min_const_intrinsic_v4f32() {
   %a = call <4 x float> @llvm.minimum.v4f32(
@@ -1392,8 +1387,7 @@ define <4 x float> @min_const_intrinsic_v4f32() {
 ; CHECK-LABEL: max_const_intrinsic_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
-; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
-; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
+; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @max_const_intrinsic_v4f32() {
   %a = call <4 x float> @llvm.maximum.v4f32(
@@ -1482,9 +1476,8 @@ define <2 x double> @abs_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: min_unordered_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
   %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
@@ -1496,9 +1489,8 @@ define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: max_unordered_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
   %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
@@ -1510,9 +1502,8 @@ define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: min_ordered_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
   %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
@@ -1524,9 +1515,8 @@ define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: max_ordered_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
-; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
+; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
   %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
@@ -1560,8 +1550,7 @@ define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: min_const_intrinsic_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
-; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
+; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @min_const_intrinsic_v2f64() {
   %a = call <2 x double> @llvm.minimum.v2f64(
@@ -1574,8 +1563,7 @@ define <2 x double> @min_const_intrinsic_v2f64() {
 ; CHECK-LABEL: max_const_intrinsic_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
-; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
-; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
+; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @max_const_intrinsic_v2f64() {
   %a = call <2 x double> @llvm.maximum.v2f64(

diff  --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index 2934d2c9beac..25e647f07230 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -36,7 +36,7 @@ define <16 x i8> @splat_v16i8(i8 %x) {
 }
 
 ; CHECK-LABEL: const_splat_v16i8:
-; SIMD128: i8x16.splat
+; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42{{$}}
 define <16 x i8> @const_splat_v16i8() {
   ret <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
                  i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
@@ -299,7 +299,7 @@ define <8 x i16> @splat_v8i16(i16 %x) {
 }
 
 ; CHECK-LABEL: const_splat_v8i16:
-; SIMD128: i16x8.splat
+; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42{{$}}
 define <8 x i16> @const_splat_v8i16() {
   ret <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
 }
@@ -547,7 +547,7 @@ define <4 x i32> @splat_v4i32(i32 %x) {
 }
 
 ; CHECK-LABEL: const_splat_v4i32:
-; SIMD128: i32x4.splat
+; SIMD128: v128.const $push0=, 42, 42, 42, 42{{$}}
 define <4 x i32> @const_splat_v4i32() {
   ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
 }
@@ -698,7 +698,7 @@ define <2 x i64> @splat_v2i64(i64 %x) {
 }
 
 ; CHECK-LABEL: const_splat_v2i64:
-; SIMD128: i64x2.splat
+; SIMD128: v128.const $push0=, 42, 42{{$}}
 define <2 x i64> @const_splat_v2i64() {
   ret <2 x i64> <i64 42, i64 42>
 }
@@ -847,7 +847,7 @@ define <4 x float> @splat_v4f32(float %x) {
 }
 
 ; CHECK-LABEL: const_splat_v4f32
-; SIMD128: f32x4.splat
+; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}}
 define <4 x float> @const_splat_v4f32() {
   ret <4 x float> <float 42., float 42., float 42., float 42.>
 }
@@ -998,7 +998,7 @@ define <2 x double> @splat_v2f64(double %x) {
 }
 
 ; CHECK-LABEL: const_splat_v2f64:
-; SIMD128: f64x2.splat
+; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5{{$}}
 define <2 x double> @const_splat_v2f64() {
   ret <2 x double> <double 42., double 42.>
 }