[llvm] 3dd75f5 - [WebAssembly] Scalarize extract_vector_elt of binops
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 8 14:32:00 PDT 2021
Author: Thomas Lively
Date: 2021-07-08T14:31:53-07:00
New Revision: 3dd75f53710683fe3616eb64a2c1865ade43b3f7
URL: https://github.com/llvm/llvm-project/commit/3dd75f53710683fe3616eb64a2c1865ade43b3f7
DIFF: https://github.com/llvm/llvm-project/commit/3dd75f53710683fe3616eb64a2c1865ade43b3f7.diff
LOG: [WebAssembly] Scalarize extract_vector_elt of binops
Override the `shouldScalarizeBinop` target lowering hook using the same
implementation used in the x86 backend. This causes `extract_vector_elt`s of
vector binary ops to be scalarized if the scalarized version would be supported.
Differential Revision: https://reviews.llvm.org/D105646
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
llvm/test/CodeGen/WebAssembly/masked-shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 76427653770f3..bd676d636af61 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -335,6 +335,26 @@ WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
return AtomicExpansionKind::CmpXChg;
}
+bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ // Implementation copied from X86TargetLowering.
+ unsigned Opc = VecOp.getOpcode();
+
+ // Assume target opcodes can't be scalarized.
+ // TODO - do we have any exceptions?
+ if (Opc >= ISD::BUILTIN_OP_END)
+ return false;
+
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+}
+
FastISel *WebAssemblyTargetLowering::createFastISel(
FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
return WebAssembly::createFastISel(FuncInfo, LibInfo);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 632c039deac09..df9a4ad0280cc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -51,6 +51,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
const WebAssemblySubtarget *Subtarget;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+ bool shouldScalarizeBinop(SDValue VecOp) const override;
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
index 75db5e190bd22..8feccaa478fe4 100644
--- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
+++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll
@@ -3,8 +3,6 @@
;; Check that masked shift counts are optimized out.
-;; TODO: optimize the *_late functions.
-
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
@@ -336,10 +334,6 @@ define <4 x i32> @shl_v4i32_late(<4 x i32> %v, i32 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.const 31, 31, 31, 31
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i32x4.extract_lane 0
; CHECK-NEXT: i32x4.shl
; CHECK-NEXT: # fallthrough-return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
@@ -372,10 +366,6 @@ define <4 x i32> @ashr_v4i32_late(<4 x i32> %v, i32 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.const 31, 31, 31, 31
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i32x4.extract_lane 0
; CHECK-NEXT: i32x4.shr_s
; CHECK-NEXT: # fallthrough-return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
@@ -408,10 +398,6 @@ define <4 x i32> @lshr_v4i32_late(<4 x i32> %v, i32 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.splat
-; CHECK-NEXT: v128.const 31, 31, 31, 31
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i32x4.extract_lane 0
; CHECK-NEXT: i32x4.shr_u
; CHECK-NEXT: # fallthrough-return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
@@ -444,10 +430,6 @@ define <2 x i64> @shl_v2i64_late(<2 x i64> %v, i64 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.const 63, 63
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i32.wrap_i64
; CHECK-NEXT: i64x2.shl
; CHECK-NEXT: # fallthrough-return
@@ -480,10 +462,6 @@ define <2 x i64> @ashr_v2i64_late(<2 x i64> %v, i64 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.const 63, 63
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i32.wrap_i64
; CHECK-NEXT: i64x2.shr_s
; CHECK-NEXT: # fallthrough-return
@@ -516,10 +494,6 @@ define <2 x i64> @lshr_v2i64_late(<2 x i64> %v, i64 %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: v128.const 63, 63
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i64x2.extract_lane 0
; CHECK-NEXT: i32.wrap_i64
; CHECK-NEXT: i64x2.shr_u
; CHECK-NEXT: # fallthrough-return
More information about the llvm-commits
mailing list