[llvm] 509b397 - [AArch64] Combine vector fptoi.sat(fmul) to fixed point fcvtz
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 10 08:12:52 PST 2021
Author: David Green
Date: 2021-11-10T16:12:48Z
New Revision: 509b397dd5586dd504864cd958ef36e6c751733c
URL: https://github.com/llvm/llvm-project/commit/509b397dd5586dd504864cd958ef36e6c751733c
DIFF: https://github.com/llvm/llvm-project/commit/509b397dd5586dd504864cd958ef36e6c751733c.diff
LOG: [AArch64] Combine vector fptoi.sat(fmul) to fixed point fcvtz
Similar to D113199 but dealing with the vector size, this extends the
fptosi+fmul to fixed point fold to handle fptosi.sat nodes that are
equally viable, so long as the saturation width matches the output
width.
Differential Revision: https://reviews.llvm.org/D113200
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fcvt_combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 07011ab02070b..11dd3a6f9e6dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -895,9 +895,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
- // TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
@@ -13432,8 +13433,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");
+ if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT) {
+ EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (SatVT.getScalarSizeInBits() != IntBits)
+ return SDValue();
+ }
+
SDLoc DL(N);
- bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
@@ -17241,6 +17250,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index f65ea72cb842a..16c72465f3e2d 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -300,9 +300,7 @@ declare <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float>)
define <2 x i32> @test1_sat(<2 x float> %f) {
; CHECK-LABEL: test1_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
@@ -312,9 +310,7 @@ define <2 x i32> @test1_sat(<2 x float> %f) {
define <4 x i32> @test2_sat(<4 x float> %f) {
; CHECK-LABEL: test2_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #65, lsl #24
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
; CHECK-NEXT: ret
%mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i)
@@ -324,10 +320,7 @@ define <4 x i32> @test2_sat(<4 x float> %f) {
define <2 x i64> @test3_sat(<2 x double> %d) {
; CHECK-LABEL: test3_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #4629700416936869888
-; CHECK-NEXT: dup v1.2d, x8
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i)
@@ -338,14 +331,8 @@ define <2 x i64> @test3_sat(<2 x double> %d) {
define <2 x i32> @test4_sat(<2 x double> %d) {
; CHECK-LABEL: test4_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2d, #16.00000000
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: mov d1, v0.d[1]
-; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fcvtzs w8, d1
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4
+; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)
@@ -356,13 +343,7 @@ define <2 x i32> @test4_sat(<2 x double> %d) {
define <2 x i16> @test5_sat(<2 x float> %f) {
; CHECK-LABEL: test5_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: movi v1.2s, #127, msl #8
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
-; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mvni v1.2s, #127, msl #8
-; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i)
@@ -389,9 +370,7 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
define <2 x i32> @test7_sat(<2 x float> %f) {
; CHECK-LABEL: test7_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.2s, #16.00000000
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
@@ -442,9 +421,7 @@ define <2 x i32> @test10_sat(<2 x float> %f) {
define <2 x i32> @test11_sat(<2 x float> %f) {
; CHECK-LABEL: test11_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2s, #65, lsl #24
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
@@ -481,10 +458,7 @@ define <2 x i32> @test13_sat(<2 x float> %f) {
define <2 x i32> @test14_sat(<2 x float> %f) {
; CHECK-LABEL: test14_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1333788672
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzs v0.2s, v0.2s
+; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
@@ -494,9 +468,7 @@ define <2 x i32> @test14_sat(<2 x float> %f) {
define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) {
; CHECK-LABEL: test_illegal_fp_to_int_sat_sat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov v1.4s, #4.00000000
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
; CHECK-NEXT: ret
%mul.i = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
%vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i)
@@ -666,10 +638,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) {
define <4 x i32> @test_extrasat(<4 x float> %f) {
; CHECK-LABEL: test_extrasat:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #65, lsl #24
-; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
-; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list