[llvm] 432e4e5 - Revert "[WebAssembly] Emulate v128.const efficiently"

Fri Oct 2 09:26:48 PDT 2020

Author: Stella Stamenova
Date: 2020-10-02T09:26:21-07:00
New Revision: 432e4e56d3d25c209b3336655aa374095e695956

URL: https://github.com/llvm/llvm-project/commit/432e4e56d3d25c209b3336655aa374095e695956
DIFF: https://github.com/llvm/llvm-project/commit/432e4e56d3d25c209b3336655aa374095e695956.diff

LOG: Revert "[WebAssembly] Emulate v128.const efficiently"

This reverts commit 542523a61a21c13e7f244bcf821b0fdeb8c6bb24.

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/test/CodeGen/WebAssembly/simd-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 8474e50ea42f..425f8b86c9fb 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -1566,7 +1565,6 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     };
   } else if (NumConstantLanes >= NumSplatLanes &&
              Subtarget->hasUnimplementedSIMD128()) {
-    // If we support v128.const, emit it directly
     SmallVector<SDValue, 16> ConstLanes;
     for (const SDValue &Lane : Op->op_values()) {
       if (IsConstant(Lane)) {
@@ -1578,67 +1576,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       }
     }
     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
-    IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
+    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
       return IsConstant(Lane);
     };
-  } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
-    // Otherwise, if this is an integer vector, pack the lane values together so
-    // we can construct the 128-bit constant from a pair of i64s using a splat
-    // followed by at most one i64x2.replace_lane. Also keep track of the lanes
-    // that actually matter so we can avoid the replace_lane in more cases.
-    std::array<uint64_t, 2> I64s({0, 0});
-    std::array<uint64_t, 2> ConstLaneMasks({0, 0});
-    uint8_t *I64Bytes = reinterpret_cast<uint8_t *>(I64s.data());
-    uint8_t *MaskBytes = reinterpret_cast<uint8_t *>(ConstLaneMasks.data());
-    unsigned I = 0;
-    size_t ByteStep = VecT.getScalarSizeInBits() / 8;
-    for (const SDValue &Lane : Op->op_values()) {
-      if (IsConstant(Lane)) {
-        using llvm::support::little;
-        using llvm::support::endian::byte_swap;
-        // The endianness of the compiler matters here. We want to enforce
-        // little endianness so that the bytes of a smaller integer type will
-        // occur first in the uint64_t.
-        auto *Const = cast<ConstantSDNode>(Lane.getNode());
-        uint64_t Val = byte_swap(Const->getLimitedValue(), little);
-        uint8_t *ValPtr = reinterpret_cast<uint8_t *>(&Val);
-        std::copy(ValPtr, ValPtr + ByteStep, I64Bytes + I * ByteStep);
-        uint64_t Mask = uint64_t(-1LL);
-        uint8_t *MaskPtr = reinterpret_cast<uint8_t *>(&Mask);
-        std::copy(MaskPtr, MaskPtr + ByteStep, MaskBytes + I * ByteStep);
-      }
-      ++I;
-    }
-    // Check whether all constant lanes in the second half of the vector are
-    // equivalent in the first half or vice versa to determine whether splatting
-    // either side will be sufficient to materialize the constant. As a special
-    // case, if the first and second halves have no constant lanes in common, we
-    // can just combine them.
-    bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
-    bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
-    bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
-
-    uint64_t Splatted;
-    if (SecondHalfSufficient) {
-      Splatted = I64s[1];
-    } else if (CombinedSufficient) {
-      Splatted = I64s[0] | I64s[1];
-    } else {
-      Splatted = I64s[0];
-    }
-
-    Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
-                                     DAG.getConstant(Splatted, DL, MVT::i64));
-    if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
-      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
-                           DAG.getConstant(I64s[1], DL, MVT::i64),
-                           DAG.getConstant(1, DL, MVT::i32));
-    }
-    Result = DAG.getBitcast(VecT, Result);
-    IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
-      return IsConstant(Lane);
-    };
-  } else {
+  }
+  if (!Result) {
     // Use a splat, but possibly a load_splat
     LoadSDNode *SplattedLoad;
     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
@@ -1651,14 +1593,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     } else {
       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
     }
-    IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
+    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
       return Lane == SplatValue;
     };
   }
 
-  assert(Result);
-  assert(IsLaneConstructed);
-
   // Add replace_lane instructions for any unhandled values
   for (size_t I = 0; I < Lanes; ++I) {
     const SDValue &Lane = Op->getOperand(I);

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index afd7375d146a..43cfa97933f8 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -8,73 +8,12 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; CHECK-LABEL:  emulated_const_trivial_splat:
-; CHECK-NEXT:   .functype       emulated_const_trivial_splat () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: return  $pop1
-; UNIMP: v128.const
-define <4 x i32> @emulated_const_trivial_splat() {
-  ret <4 x i32> <i32 1, i32 2, i32 1, i32 2>
-}
-
-; CHECK-LABEL:  emulated_const_first_sufficient:
-; CHECK-NEXT:   .functype       emulated_const_first_sufficient () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: return  $pop1
-; UNIMP: v128.const
-define <4 x i32> @emulated_const_first_sufficient() {
-  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 2>
-}
-
-; CHECK-LABEL:  emulated_const_second_sufficient:
-; CHECK-NEXT:   .functype       emulated_const_second_sufficient () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: return  $pop1
-; UNIMP: v128.const
-define <4 x i32> @emulated_const_second_sufficient() {
-  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 2>
-}
-
-; CHECK-LABEL:  emulated_const_combined_sufficient:
-; CHECK-NEXT:   .functype       emulated_const_combined_sufficient () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: return  $pop1
-; UNIMP: v128.const
-define <4 x i32> @emulated_const_combined_sufficient() {
-  ret <4 x i32> <i32 1, i32 undef, i32 undef, i32 2>
-}
-
-; CHECK-LABEL:  emulated_const_either_sufficient:
-; CHECK-NEXT:   .functype       emulated_const_either_sufficient () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 1
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: return  $pop1
-; UNIMP: v128.const
-define <4 x i32> @emulated_const_either_sufficient() {
-  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 undef>
-}
-
-; CHECK-LABEL: emulated_const_neither_sufficient:
-; CHECK-NEXT:   .functype       emulated_const_neither_sufficient () -> (v128)
-; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
-; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
-; SIMD-VM-NEXT: i64.const       $push2=, 17179869184
-; SIMD-VM-NEXT: i64x2.replace_lane      $push3=, $pop1, 1, $pop2
-; SIMD-VM-NEXT: return  $pop3
-define <4 x i32> @emulated_const_neither_sufficient() {
-  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 4>
-}
-
 ; CHECK-LABEL: same_const_one_replaced_i16x8:
 ; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
 ; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
 ; UNIMP-NEXT:  return          $pop[[L1]]
-; SIMD-VM: i64x2.splat
+; SIMD-VM: i16x8.splat
 define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
   %v = insertelement
     <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
@@ -88,7 +27,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
 ; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
 ; UNIMP-NEXT:  return          $pop[[L1]]
-; SIMD-VM: i64x2.splat
+; SIMD-VM: i16x8.splat
 define <8 x i16> @
diff erent_const_one_replaced_i16x8(i16 %x) {
   %v = insertelement
     <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
@@ -129,7 +68,7 @@ define <4 x float> @
diff erent_const_one_replaced_f32x4(float %x) {
 ; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
 ; UNIMP-NEXT:  return          $pop[[L0]]
-; SIMD-VM: i64x2.splat
+; SIMD-VM: i32x4.splat
 define <4 x i32> @splat_common_const_i32x4() {
   ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
 }
@@ -267,7 +206,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla
 ; UNIMP:       i8x16.replace_lane
 ; UNIMP:       i8x16.replace_lane
 ; UNIMP:       return
-; SIMD-VM: i64x2.splat
+; SIMD-VM: i8x16.splat
 define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
   ; swizzle 0
   %m0 = extractelement <16 x i8> %mask, i32 0