[llvm] 043eaa9 - [WebAssembly][NFC] Simplify vector shift lowering and add tests
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 10 00:19:06 PDT 2020
Author: Thomas Lively
Date: 2020-07-10T00:18:59-07:00
New Revision: 043eaa9a4a0808fe4e82b2ef1823ccafa491c065
URL: https://github.com/llvm/llvm-project/commit/043eaa9a4a0808fe4e82b2ef1823ccafa491c065
DIFF: https://github.com/llvm/llvm-project/commit/043eaa9a4a0808fe4e82b2ef1823ccafa491c065.diff
LOG: [WebAssembly][NFC] Simplify vector shift lowering and add tests
This patch builds on 0d7286a652 by simplifying the code for detecting
splat values and adding new tests demonstrating the lowering of
splatted absolute value shift amounts, which are common in code
generated by Halide. The lowering is very bad right now, but
subsequent patches will improve it considerably. The tests will be
useful for evaluating the improvements in those patches.
Reviewed By: aheejin
Differential Revision: https://reviews.llvm.org/D83493
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3f4ebd501595..a9b9eceb4130 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1677,12 +1677,12 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
- auto ShiftVal = Op.getOperand(1);
- if (!DAG.isSplatValue(ShiftVal, /*AllowUndefs=*/true))
+ auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
+ if (!ShiftVal)
return unrollVectorShift(Op, DAG);
- auto SplatVal = DAG.getSplatValue(ShiftVal);
- assert(SplatVal != SDValue());
+ // Use anyext because none of the high bits can affect the shift
+ ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
unsigned Opcode;
switch (Op.getOpcode()) {
@@ -1699,10 +1699,7 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
llvm_unreachable("unexpected opcode");
}
- // Use anyext because none of the high bits can affect the shift
- auto ScalarShift = DAG.getAnyExtOrTrunc(SplatVal, DL, MVT::i32);
- return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
- ScalarShift);
+ return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
index ded430f89545..2473f0b27b7e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
@@ -25,3 +25,79 @@ define <16 x i8> @shl_add(<16 x i8> %v, i8 %a, i8 %b) {
%r = shl <16 x i8> %v, %shift
ret <16 x i8> %r
}
+
+; CHECK-LABEL: shl_abs:
+; CHECK-NEXT: .functype shl_abs (v128, i32) -> (v128)
+; CHECK-NEXT: i8x16.extract_lane_u $push8=, $0, 0
+; CHECK-NEXT: i8x16.splat $push0=, $1
+; CHECK-NEXT: i8x16.abs $push98=, $pop0
+; CHECK-NEXT: local.tee $push97=, $2=, $pop98
+; CHECK-NEXT: i8x16.extract_lane_u $push6=, $pop97, 0
+; CHECK-NEXT: i32.const $push2=, 7
+; CHECK-NEXT: i32.and $push7=, $pop6, $pop2
+; CHECK-NEXT: i32.shl $push9=, $pop8, $pop7
+; CHECK-NEXT: i8x16.splat $push10=, $pop9
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $0, 1
+; CHECK-NEXT: i8x16.extract_lane_u $push1=, $2, 1
+; CHECK-NEXT: i32.const $push96=, 7
+; CHECK-NEXT: i32.and $push3=, $pop1, $pop96
+; CHECK-NEXT: i32.shl $push5=, $pop4, $pop3
+; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5
+; ...
+; CHECK: i8x16.extract_lane_u $push79=, $0, 15
+; CHECK-NEXT: i8x16.extract_lane_u $push77=, $2, 15
+; CHECK-NEXT: i32.const $push82=, 7
+; CHECK-NEXT: i32.and $push78=, $pop77, $pop82
+; CHECK-NEXT: i32.shl $push80=, $pop79, $pop78
+; CHECK-NEXT: i8x16.replace_lane $push81=, $pop76, 15, $pop80
+; CHECK-NEXT: return $pop81
+define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
+ %t1 = insertelement <16 x i8> undef, i8 %a, i32 0
+ %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
+ %nva = sub <16 x i8> zeroinitializer, %va
+ %c = icmp sgt <16 x i8> %va, zeroinitializer
+ %shift = select <16 x i1> %c, <16 x i8> %va, <16 x i8> %nva
+ %r = shl <16 x i8> %v, %shift
+ ret <16 x i8> %r
+}
+
+; CHECK-LABEL: shl_abs_add:
+; CHECK-NEXT: .functype shl_abs_add (v128, i32, i32) -> (v128)
+; CHECK-NEXT: i8x16.extract_lane_u $push11=, $0, 0
+; CHECK-NEXT: i8x16.splat $push1=, $1
+; CHECK-NEXT: i8x16.splat $push0=, $2
+; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
+; CHECK-NEXT: v8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.abs $push101=, $pop3
+; CHECK-NEXT: local.tee $push100=, $3=, $pop101
+; CHECK-NEXT: i8x16.extract_lane_u $push9=, $pop100, 0
+; CHECK-NEXT: i32.const $push5=, 7
+; CHECK-NEXT: i32.and $push10=, $pop9, $pop5
+; CHECK-NEXT: i32.shl $push12=, $pop11, $pop10
+; CHECK-NEXT: i8x16.splat $push13=, $pop12
+; CHECK-NEXT: i8x16.extract_lane_u $push7=, $0, 1
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $3, 1
+; CHECK-NEXT: i32.const $push99=, 7
+; CHECK-NEXT: i32.and $push6=, $pop4, $pop99
+; CHECK-NEXT: i32.shl $push8=, $pop7, $pop6
+; CHECK-NEXT: i8x16.replace_lane $push14=, $pop13, 1, $pop8
+; ...
+; CHECK: i8x16.extract_lane_u $push82=, $0, 15
+; CHECK-NEXT: i8x16.extract_lane_u $push80=, $3, 15
+; CHECK-NEXT: i32.const $push85=, 7
+; CHECK-NEXT: i32.and $push81=, $pop80, $pop85
+; CHECK-NEXT: i32.shl $push83=, $pop82, $pop81
+; CHECK-NEXT: i8x16.replace_lane $push84=, $pop79, 15, $pop83
+; CHECK-NEXT: return $pop84
+define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) {
+ %t1 = insertelement <16 x i8> undef, i8 %a, i32 0
+ %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
+ %t2 = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
+ %vadd = add <16 x i8> %va, %vb
+ %nvadd = sub <16 x i8> zeroinitializer, %vadd
+ %c = icmp sgt <16 x i8> %vadd, zeroinitializer
+ %shift = select <16 x i1> %c, <16 x i8> %vadd, <16 x i8> %nvadd
+ %r = shl <16 x i8> %v, %shift
+ ret <16 x i8> %r
+}
More information about the llvm-commits
mailing list