[llvm] r367191 - [ARM] Better patterns for fp <> predicate vectors
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 28 06:53:39 PDT 2019
Author: dmgreen
Date: Sun Jul 28 06:53:39 2019
New Revision: 367191
URL: http://llvm.org/viewvc/llvm-project?rev=367191&view=rev
Log:
[ARM] Better patterns for fp <> predicate vectors
These are some better patterns for converting between predicates and floating
points. Much like the extends, we select "1"/"-1" or "0" depending on the
predicate value. Or we perform a compare against 0 to convert to a predicate.
Differential Revision: https://reviews.llvm.org/D65103
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
llvm/trunk/test/CodeGen/Thumb2/mve-pred-ext.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=367191&r1=367190&r2=367191&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Sun Jul 28 06:53:39 2019
@@ -349,10 +349,6 @@ void ARMTargetLowering::addMVEVectorType
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, VT, Expand);
- setOperationAction(ISD::FP_TO_UINT, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::UINT_TO_FP, VT, Expand);
}
}
Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=367191&r1=367190&r2=367191&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Sun Jul 28 06:53:39 2019
@@ -4561,6 +4561,7 @@ let Predicates = [HasMVEInt] in {
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+ // Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
@@ -4583,6 +4584,31 @@ let Predicates = [HasMVEInt] in {
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
}
+let Predicates = [HasMVEFloat] in {
+ // Pred <-> Float
+ // 112 is 1.0 in float
+ def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ // 2620 in 1.0 in half
+ def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ // 240 is -1.0 in float
+ def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ // 2748 is -1.0 in half
+ def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+
+ def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+}
+
def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
"vpnot", "", "", vpred_n, "", []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-pred-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-pred-ext.ll?rev=367191&r1=367190&r2=367191&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-pred-ext.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-pred-ext.ll Sun Jul 28 06:53:39 2019
@@ -166,20 +166,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @uitofp_v4i1_v4f32(<4 x i32> %src) {
; CHECK-LABEL: uitofp_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: ubfx r1, r0, #8, #1
-; CHECK-NEXT: ubfx r2, r0, #12, #1
-; CHECK-NEXT: vmov s0, r2
-; CHECK-NEXT: vmov s4, r1
-; CHECK-NEXT: vcvt.f32.u32 s3, s0
-; CHECK-NEXT: ubfx r2, r0, #4, #1
-; CHECK-NEXT: vcvt.f32.u32 s2, s4
-; CHECK-NEXT: and r0, r0, #1
-; CHECK-NEXT: vmov s4, r2
-; CHECK-NEXT: vcvt.f32.u32 s1, s4
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vcvt.f32.u32 s0, s4
+; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
@@ -190,24 +180,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @sitofp_v4i1_v4f32(<4 x i32> %src) {
; CHECK-LABEL: sitofp_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vmov.f32 q2, #-1.000000e+00
; CHECK-NEXT: vcmp.s32 gt, q0, zr
-; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: and r1, r0, #1
-; CHECK-NEXT: ubfx r2, r0, #8, #1
-; CHECK-NEXT: ubfx r3, r0, #4, #1
-; CHECK-NEXT: ubfx r0, r0, #12, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: vmov s4, r2
-; CHECK-NEXT: vmov s0, r0
-; CHECK-NEXT: rsbs r0, r3, #0
-; CHECK-NEXT: vcvt.f32.s32 s3, s0
-; CHECK-NEXT: vcvt.f32.s32 s2, s4
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: rsbs r0, r1, #0
-; CHECK-NEXT: vcvt.f32.s32 s1, s4
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vcvt.f32.s32 s0, s4
+; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
@@ -218,26 +194,9 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
; CHECK-LABEL: fptoui_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcvt.s32.f32 s4, s0
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vcvt.s32.f32 s4, s1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r0, r1, #0, #4
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vcvt.s32.f32 s4, s2
-; CHECK-NEXT: vcvt.s32.f32 s0, s3
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r0, r1, #4, #4
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r0, r1, #8, #4
-; CHECK-NEXT: vmov r1, s0
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r0, r1, #12, #4
-; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vcmp.f32 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
@@ -249,22 +208,9 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
; CHECK-LABEL: fptosi_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcvt.s32.f32 s4, s0
-; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vcvt.s32.f32 s4, s1
-; CHECK-NEXT: bfi r0, r1, #0, #4
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vcvt.s32.f32 s4, s2
-; CHECK-NEXT: bfi r0, r1, #4, #4
-; CHECK-NEXT: vcvt.s32.f32 s0, s3
-; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: bfi r0, r1, #8, #4
-; CHECK-NEXT: vmov r1, s0
-; CHECK-NEXT: bfi r0, r1, #12, #4
-; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vcmp.f32 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
@@ -273,3 +219,60 @@ entry:
ret <4 x float> %s
}
+
+
+define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) {
+; CHECK-LABEL: uitofp_v8i1_v8f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i16 q1, #0x0
+; CHECK-NEXT: vmov.i16 q2, #0x3c00
+; CHECK-NEXT: vcmp.s16 gt, q0, zr
+; CHECK-NEXT: vpsel q0, q2, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <8 x i16> %src, zeroinitializer
+ %0 = uitofp <8 x i1> %c to <8 x half>
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @sitofp_v8i1_v8f16(<8 x i16> %src) {
+; CHECK-LABEL: sitofp_v8i1_v8f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i16 q1, #0x0
+; CHECK-NEXT: vmov.i16 q2, #0xbc00
+; CHECK-NEXT: vcmp.s16 gt, q0, zr
+; CHECK-NEXT: vpsel q0, q2, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <8 x i16> %src, zeroinitializer
+ %0 = sitofp <8 x i1> %c to <8 x half>
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
+; CHECK-LABEL: fptoui_v8i1_v8f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vmov.i16 q2, #0x3c00
+; CHECK-NEXT: vcmp.f16 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q2, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = fptoui <8 x half> %src to <8 x i1>
+ %s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
+ ret <8 x half> %s
+}
+
+define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
+; CHECK-LABEL: fptosi_v8i1_v8f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vmov.i16 q2, #0x3c00
+; CHECK-NEXT: vcmp.f16 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q2, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = fptosi <8 x half> %src to <8 x i1>
+ %s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
+ ret <8 x half> %s
+}
More information about the llvm-commits
mailing list