[llvm] fd0557d - [WebAssembly] More convert_low and promote_low codegen
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 19 15:37:24 PDT 2021
Author: Thomas Lively
Date: 2021-08-19T15:37:12-07:00
New Revision: fd0557dbf13198ddd6a17bd120a7b2bb97bfbd0a
URL: https://github.com/llvm/llvm-project/commit/fd0557dbf13198ddd6a17bd120a7b2bb97bfbd0a
DIFF: https://github.com/llvm/llvm-project/commit/fd0557dbf13198ddd6a17bd120a7b2bb97bfbd0a.diff
LOG: [WebAssembly] More convert_low and promote_low codegen
The convert_low and promote_low instructions can widen the lower two lanes of a
four-lane vector, but we were previously scalarizing patterns that widened lanes
besides the low two lanes. The commit adds a shuffle to move the widened lanes
into the low lane positions so the convert_low and promote_low instructions can
be used instead of scalarizing.
Depends on D108266.
Differential Revision: https://reviews.llvm.org/D108341
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/test/CodeGen/WebAssembly/simd-conversions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index c418a3811e5e..59b4812dc056 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1779,6 +1779,7 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
if (Op.getValueType() != MVT::v2f64)
return SDValue();
@@ -1816,10 +1817,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
!GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
return SDValue();
- if (LHSOpcode != RHSOpcode || LHSSrcVec != RHSSrcVec)
- return SDValue();
-
- if (LHSIndex != 0 || RHSIndex != 1)
+ if (LHSOpcode != RHSOpcode)
return SDValue();
MVT ExpectedSrcVT;
@@ -1835,7 +1833,13 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
if (LHSSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- return DAG.getNode(LHSOpcode, SDLoc(Op), MVT::v2f64, LHSSrcVec);
+ auto Src = LHSSrcVec;
+ if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ // Shuffle the source vector so that the converted lanes are the low lanes.
+ Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
+ {LHSIndex, RHSIndex + 4, -1, -1});
+ }
+ return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
index c624058944a8..aacbce01f382 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -317,13 +317,9 @@ define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 2
-; CHECK-NEXT: f64.convert_i32_s
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 3
-; CHECK-NEXT: f64.convert_i32_s
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -341,13 +337,9 @@ define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 2
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 3
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -366,13 +358,9 @@ define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 2
-; CHECK-NEXT: f64.convert_i32_s
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 3
-; CHECK-NEXT: f64.convert_i32_s
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = sitofp <8 x i32> %x to <8 x double>
@@ -390,13 +378,9 @@ define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 2
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32x4.extract_lane 3
-; CHECK-NEXT: f64.convert_i32_u
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = uitofp <8 x i32> %x to <8 x double>
@@ -414,13 +398,9 @@ define <4 x double> @promote_low_v4f64(<8 x float> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: f64.promote_f32
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: f64.promote_f32
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -438,16 +418,26 @@ define <4 x double> @promote_low_v4f64_2(<8 x float> %x) {
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: f64.promote_f32
-; CHECK-NEXT: f64x2.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: f64.promote_f32
-; CHECK-NEXT: f64x2.replace_lane 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: # fallthrough-return
%v = fpext <8 x float> %x to <8 x double>
%a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %a
}
+
+define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: promote_mixed_v2f64:
+; CHECK: .functype promote_mixed_v2f64 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: f64x2.promote_low_f32x4
+; CHECK-NEXT: # fallthrough-return
+ %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> <i32 2, i32 7>
+ %a = fpext <2 x float> %v to <2 x double>
+ ret <2 x double> %a
+}
More information about the llvm-commits
mailing list