[llvm] fbb61ad - [ARM] Convert fptoi.sat to fixed point multiply
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 25 07:43:51 PST 2021
Author: David Green
Date: 2021-11-25T15:43:45Z
New Revision: fbb61adb70a36c95568dd9d6ebf464c432767eff
URL: https://github.com/llvm/llvm-project/commit/fbb61adb70a36c95568dd9d6ebf464c432767eff
DIFF: https://github.com/llvm/llvm-project/commit/fbb61adb70a36c95568dd9d6ebf464c432767eff.diff
LOG: [ARM] Convert fptoi.sat to fixed point multiply
This is a very small addition to the existing MVE fixed point vcvt code
to also create them from FP_TO_SINT_SAT and FP_TO_UINT_SAT nodes, which
should be equally valid for native saturating converts under MVE.
Differential Revision: https://reviews.llvm.org/D114360
Added:
Modified:
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2b83a292db768..bb2859c766c2e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3274,7 +3274,8 @@ bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
return false;
unsigned int ScalarBits = Type.getScalarSizeInBits();
- bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT;
+ bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT;
SDNode *Node = N->getOperand(0).getNode();
// floating-point to fixed-point with one fractional bit gets turned into an
@@ -3764,6 +3765,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
if (tryFP_TO_INT(N, dl))
return;
break;
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
index c0dc38c8b9a45..2237d1b93628b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
@@ -1030,8 +1030,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_negative2(<4 x float> %0) {
define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_s16_1(<8 x half> %0) {
; CHECK-LABEL: vcvt_sat_s16_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vadd.f16 q0, q0, q0
-; CHECK-NEXT: vcvt.s16.f16 q0, q0
+; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1
; CHECK-NEXT: bx lr
%2 = fmul fast <8 x half> %0, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
%3 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %2)
@@ -1041,8 +1040,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_s16_1(<8 x half> %0) {
define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_u16_1(<8 x half> %0) {
; CHECK-LABEL: vcvt_sat_u16_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vadd.f16 q0, q0, q0
-; CHECK-NEXT: vcvt.u16.f16 q0, q0
+; CHECK-NEXT: vcvt.u16.f16 q0, q0, #1
; CHECK-NEXT: bx lr
%2 = fmul fast <8 x half> %0, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
%3 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %2)
@@ -1052,9 +1050,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_u16_1(<8 x half> %0) {
define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_s16_6(<8 x half> %0) {
; CHECK-LABEL: vcvt_sat_s16_6:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i16 q1, #0x5400
-; CHECK-NEXT: vmul.f16 q0, q0, q1
-; CHECK-NEXT: vcvt.s16.f16 q0, q0
+; CHECK-NEXT: vcvt.s16.f16 q0, q0, #6
; CHECK-NEXT: bx lr
%2 = fmul fast <8 x half> %0, <half 0xH5400, half 0xH5400, half 0xH5400, half 0xH5400, half 0xH5400, half 0xH5400, half 0xH5400, half 0xH5400>
%3 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %2)
@@ -1064,9 +1060,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_s16_6(<8 x half> %0) {
define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_u16_7(<8 x half> %0) {
; CHECK-LABEL: vcvt_sat_u16_7:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i16 q1, #0x5800
-; CHECK-NEXT: vmul.f16 q0, q0, q1
-; CHECK-NEXT: vcvt.u16.f16 q0, q0
+; CHECK-NEXT: vcvt.u16.f16 q0, q0, #7
; CHECK-NEXT: bx lr
%2 = fmul fast <8 x half> %0, <half 0xH5800, half 0xH5800, half 0xH5800, half 0xH5800, half 0xH5800, half 0xH5800, half 0xH5800, half 0xH5800>
%3 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %2)
@@ -1077,8 +1071,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcvt_sat_u16_7(<8 x half> %0) {
define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_s32_1(<4 x float> %0) {
; CHECK-LABEL: vcvt_sat_s32_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vadd.f32 q0, q0, q0
-; CHECK-NEXT: vcvt.s32.f32 q0, q0
+; CHECK-NEXT: vcvt.s32.f32 q0, q0, #1
; CHECK-NEXT: bx lr
%2 = fmul fast <4 x float> %0, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
%3 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %2)
@@ -1088,8 +1081,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_s32_1(<4 x float> %0) {
define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_1(<4 x float> %0) {
; CHECK-LABEL: vcvt_sat_u32_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vadd.f32 q0, q0, q0
-; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: vcvt.u32.f32 q0, q0, #1
; CHECK-NEXT: bx lr
%2 = fmul fast <4 x float> %0, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
%3 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %2)
@@ -1099,9 +1091,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_1(<4 x float> %0) {
define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_11(<4 x float> %0) {
; CHECK-LABEL: vcvt_sat_u32_11:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i32 q1, #0x45000000
-; CHECK-NEXT: vmul.f32 q0, q0, q1
-; CHECK-NEXT: vcvt.s32.f32 q0, q0
+; CHECK-NEXT: vcvt.s32.f32 q0, q0, #11
; CHECK-NEXT: bx lr
%2 = fmul fast <4 x float> %0, <float 2.048000e+03, float 2.048000e+03, float 2.048000e+03, float 2.048000e+03>
%3 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %2)
@@ -1111,9 +1101,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_11(<4 x float> %0) {
define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_7(<4 x float> %0) {
; CHECK-LABEL: vcvt_sat_u32_7:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i32 q1, #0x4b000000
-; CHECK-NEXT: vmul.f32 q0, q0, q1
-; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23
; CHECK-NEXT: bx lr
%2 = fmul fast <4 x float> %0, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%3 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %2)
@@ -1123,10 +1111,8 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_7(<4 x float> %0) {
define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_7_24(<4 x float> %0) {
; CHECK-LABEL: vcvt_sat_u32_7_24:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i32 q2, #0x4b000000
; CHECK-NEXT: vmov.i32 q1, #0xffffff
-; CHECK-NEXT: vmul.f32 q0, q0, q2
-; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23
; CHECK-NEXT: vmin.u32 q0, q0, q1
; CHECK-NEXT: vbic.i32 q0, #0xff000000
; CHECK-NEXT: bx lr
More information about the llvm-commits
mailing list