[llvm] fb66026 - [WebAssembly] Explicitly add {z, s}ext so extends are selected
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 6 04:30:29 PST 2023
Author: Luke Lau
Date: 2023-01-06T12:28:29Z
New Revision: fb6602616caa6d917543cdf0a429c9e2063a4a78
URL: https://github.com/llvm/llvm-project/commit/fb6602616caa6d917543cdf0a429c9e2063a4a78
DIFF: https://github.com/llvm/llvm-project/commit/fb6602616caa6d917543cdf0a429c9e2063a4a78.diff
LOG: [WebAssembly] Explicitly add {z,s}ext so extends are selected
During DAG legalization, {u,s}itofp instructions on v2i8, v2i16, v4i8
and v4i16 types ended up being legalized into scalar instructions, when
they could just be extended to v2i32/v4i32 instead.
Fixes https://github.com/llvm/llvm-project/issues/57182
Differential Revision: https://reviews.llvm.org/D140916
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index f32efde459024..94544800a6fba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2356,6 +2356,32 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DAG.getBitcast(DstType, NewShuffle);
}
+/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
+/// split up into scalar instructions during legalization, and the vector
+/// extending instructions are selected in performVectorExtendCombine below.
+static SDValue
+performVectorExtendToFPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+ assert(N->getOpcode() == ISD::UINT_TO_FP ||
+ N->getOpcode() == ISD::SINT_TO_FP);
+
+ EVT InVT = N->getOperand(0)->getValueType(0);
+ EVT ResVT = N->getValueType(0);
+ MVT ExtVT;
+ if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
+ ExtVT = MVT::v4i32;
+ else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
+ ExtVT = MVT::v2i32;
+ else
+ return SDValue();
+
+ unsigned Op =
+ N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
+}
+
static SDValue
performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
@@ -2641,6 +2667,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return performVectorExtendCombine(N, DCI);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ return performVectorExtendToFPCombine(N, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
case ISD::FP_ROUND:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
index b42754ad65172..2b871f1e84adc 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll
@@ -11,21 +11,8 @@ define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) {
; CHECK: .functype extend_to_float_low_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 0
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 1
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 2
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 3
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i32x4.extend_low_i16x8_u
+; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i16> %low to <4 x float>
@@ -37,21 +24,8 @@ define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) {
; CHECK: .functype extend_to_float_high_i16x8_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 4
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 5
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 6
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 7
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i32x4.extend_high_i16x8_u
+; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i16> %high to <4 x float>
@@ -62,22 +36,10 @@ define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) {
; CHECK-LABEL: extend_to_float_low_i8x16_u:
; CHECK: .functype extend_to_float_low_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 0
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 1
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 2
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 3
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15
+; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = uitofp <4 x i8> %low to <4 x float>
@@ -89,21 +51,9 @@ define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) {
; CHECK: .functype extend_to_float_high_i8x16_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 4
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 5
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 6
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_u 7
-; CHECK-NEXT: f32.convert_i32_u
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 4, 17, 18, 19, 5, 21, 22, 23, 6, 25, 26, 27, 7, 29, 30, 31
+; CHECK-NEXT: f32x4.convert_i32x4_u
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = uitofp <4 x i8> %high to <4 x float>
@@ -115,21 +65,8 @@ define <4 x float> @extend_to_float_low_i16x8_s(<8 x i16> %x) {
; CHECK: .functype extend_to_float_low_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 0
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 1
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 2
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 3
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i32x4.extend_low_i16x8_s
+; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i16> %low to <4 x float>
@@ -141,21 +78,8 @@ define <4 x float> @extend_to_float_high_i16x8_s(<8 x i16> %x) {
; CHECK: .functype extend_to_float_high_i16x8_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 4
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 5
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 6
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_s 7
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i32x4.extend_high_i16x8_s
+; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i16> %high to <4 x float>
@@ -167,21 +91,13 @@ define <4 x float> @extend_to_float_low_i8x16_s(<8 x i8> %x) {
; CHECK: .functype extend_to_float_low_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 0
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.splat
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 1
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 2
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 3
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i8x16.shuffle 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32x4.shl
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32x4.shr_s
+; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%extended = sitofp <4 x i8> %low to <4 x float>
@@ -193,21 +109,13 @@ define <4 x float> @extend_to_float_high_i8x16_s(<8 x i8> %x) {
; CHECK: .functype extend_to_float_high_i8x16_s (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 4
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 5
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 6
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 2
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.extract_lane_s 7
-; CHECK-NEXT: f32.convert_i32_s
-; CHECK-NEXT: f32x4.replace_lane 3
+; CHECK-NEXT: i8x16.shuffle 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32x4.shl
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32x4.shr_s
+; CHECK-NEXT: f32x4.convert_i32x4_s
; CHECK-NEXT: # fallthrough-return
%high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%extended = sitofp <4 x i8> %high to <4 x float>
@@ -230,14 +138,10 @@ define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) {
; CHECK-LABEL: extend_to_double_low_i16x4_u:
; CHECK: .functype extend_to_double_low_i16x4_u (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 0
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i16x8.extract_lane_u 1
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 16, 17, 2, 3, 18, 19, 6, 7, 20, 21, 10, 11, 22, 23, 14, 15
+; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: # fallthrough-return
%low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%extended = uitofp <2 x i16> %low to <2 x double>
More information about the llvm-commits
mailing list