[llvm] 043eaa9 - [WebAssembly][NFC] Simplify vector shift lowering and add tests

Fri Jul 10 00:19:06 PDT 2020

Author: Thomas Lively
Date: 2020-07-10T00:18:59-07:00
New Revision: 043eaa9a4a0808fe4e82b2ef1823ccafa491c065

URL: https://github.com/llvm/llvm-project/commit/043eaa9a4a0808fe4e82b2ef1823ccafa491c065
DIFF: https://github.com/llvm/llvm-project/commit/043eaa9a4a0808fe4e82b2ef1823ccafa491c065.diff

LOG: [WebAssembly][NFC] Simplify vector shift lowering and add tests

This patch builds on 0d7286a652 by simplifying the code for detecting
splat values and adding new tests demonstrating the lowering of
splatted absolute value shift amounts, which are common in code
generated by Halide. The lowering is very bad right now, but
subsequent patches will improve it considerably. The tests will be
useful for evaluating the improvements in those patches.

Reviewed By: aheejin

Differential Revision: https://reviews.llvm.org/D83493

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3f4ebd501595..a9b9eceb4130 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1677,12 +1677,12 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
   // Only manually lower vector shifts
   assert(Op.getSimpleValueType().isVector());
 
-  auto ShiftVal = Op.getOperand(1);
-  if (!DAG.isSplatValue(ShiftVal, /*AllowUndefs=*/true))
+  auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
+  if (!ShiftVal)
     return unrollVectorShift(Op, DAG);
 
-  auto SplatVal = DAG.getSplatValue(ShiftVal);
-  assert(SplatVal != SDValue());
+  // Use anyext because none of the high bits can affect the shift
+  ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
 
   unsigned Opcode;
   switch (Op.getOpcode()) {
@@ -1699,10 +1699,7 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
     llvm_unreachable("unexpected opcode");
   }
 
-  // Use anyext because none of the high bits can affect the shift
-  auto ScalarShift = DAG.getAnyExtOrTrunc(SplatVal, DL, MVT::i32);
-  return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
-                     ScalarShift);
+  return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
index ded430f89545..2473f0b27b7e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
@@ -25,3 +25,79 @@ define <16 x i8> @shl_add(<16 x i8> %v, i8 %a, i8 %b) {
   %r = shl <16 x i8> %v, %shift
   ret <16 x i8> %r
 }
+
+; CHECK-LABEL: shl_abs:
+; CHECK-NEXT: .functype shl_abs (v128, i32) -> (v128)
+; CHECK-NEXT: i8x16.extract_lane_u $push8=, $0, 0
+; CHECK-NEXT: i8x16.splat $push0=, $1
+; CHECK-NEXT: i8x16.abs $push98=, $pop0
+; CHECK-NEXT: local.tee $push97=, $2=, $pop98
+; CHECK-NEXT: i8x16.extract_lane_u $push6=, $pop97, 0
+; CHECK-NEXT: i32.const $push2=, 7
+; CHECK-NEXT: i32.and $push7=, $pop6, $pop2
+; CHECK-NEXT: i32.shl $push9=, $pop8, $pop7
+; CHECK-NEXT: i8x16.splat $push10=, $pop9
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $0, 1
+; CHECK-NEXT: i8x16.extract_lane_u $push1=, $2, 1
+; CHECK-NEXT: i32.const $push96=, 7
+; CHECK-NEXT: i32.and $push3=, $pop1, $pop96
+; CHECK-NEXT: i32.shl $push5=, $pop4, $pop3
+; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5
+; ...
+; CHECK:      i8x16.extract_lane_u $push79=, $0, 15
+; CHECK-NEXT: i8x16.extract_lane_u $push77=, $2, 15
+; CHECK-NEXT: i32.const $push82=, 7
+; CHECK-NEXT: i32.and $push78=, $pop77, $pop82
+; CHECK-NEXT: i32.shl $push80=, $pop79, $pop78
+; CHECK-NEXT: i8x16.replace_lane $push81=, $pop76, 15, $pop80
+; CHECK-NEXT: return $pop81
+define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
+  %t1 = insertelement <16 x i8> undef, i8 %a, i32 0
+  %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %nva = sub <16 x i8> zeroinitializer, %va
+  %c = icmp sgt <16 x i8> %va, zeroinitializer
+  %shift = select <16 x i1> %c, <16 x i8> %va, <16 x i8> %nva
+  %r = shl <16 x i8> %v, %shift
+  ret <16 x i8> %r
+}
+
+; CHECK-LABEL: shl_abs_add:
+; CHECK-NEXT: .functype shl_abs_add (v128, i32, i32) -> (v128)
+; CHECK-NEXT: i8x16.extract_lane_u $push11=, $0, 0
+; CHECK-NEXT: i8x16.splat $push1=, $1
+; CHECK-NEXT: i8x16.splat $push0=, $2
+; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
+; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.abs $push101=, $pop3
+; CHECK-NEXT: local.tee $push100=, $3=, $pop101
+; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0
+; CHECK-NEXT: i32.const $push5=, 7
+; CHECK-NEXT: i32.and $push10=, $pop9, $pop5
+; CHECK-NEXT: i32.shl $push12=, $pop11, $pop10
+; CHECK-NEXT: i8x16.splat $push13=, $pop12
+; CHECK-NEXT: i8x16.extract_lane_u $push7=, $0, 1
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $3, 1
+; CHECK-NEXT: i32.const $push99=, 7
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop99
+; CHECK-NEXT: i32.shl $push8=, $pop7, $pop6
+; CHECK-NEXT: i8x16.replace_lane $push14=, $pop13, 1, $pop8
+; ...
+; CHECK:      i8x16.extract_lane_u $push82=, $0, 15
+; CHECK-NEXT: i8x16.extract_lane_u $push80=, $3, 15
+; CHECK-NEXT: i32.const $push85=, 7
+; CHECK-NEXT: i32.and $push81=, $pop80, $pop85
+; CHECK-NEXT: i32.shl $push83=, $pop82, $pop81
+; CHECK-NEXT: i8x16.replace_lane $push84=, $pop79, 15, $pop83
+; CHECK-NEXT: return $pop84
+define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) {
+  %t1 = insertelement <16 x i8> undef, i8 %a, i32 0
+  %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %t2 = insertelement <16 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %vadd = add <16 x i8> %va, %vb
+  %nvadd = sub <16 x i8> zeroinitializer, %vadd
+  %c = icmp sgt <16 x i8> %vadd, zeroinitializer
+  %shift = select <16 x i1> %c, <16 x i8> %vadd, <16 x i8> %nvadd
+  %r = shl <16 x i8> %v, %shift
+  ret <16 x i8> %r
+}