[llvm] adfd12e - [ARM] Add patterns for store(fptosisat(..))

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 3 11:22:21 PDT 2021


Author: David Green
Date: 2021-09-03T19:22:11+01:00
New Revision: adfd12e6d17c09b81c1e5784940a0dbb00fe0945

URL: https://github.com/llvm/llvm-project/commit/adfd12e6d17c09b81c1e5784940a0dbb00fe0945
DIFF: https://github.com/llvm/llvm-project/commit/adfd12e6d17c09b81c1e5784940a0dbb00fe0945.diff

LOG: [ARM] Add patterns for store(fptosisat(..))

As an extension to D107866, this adds store(fptosisat(..)) patterns,
similar to the existing fptosi patterns, to prevent unnecessarily moving
into gpr regs where we can use fp stores directly.

Differential Revision: https://reviews.llvm.org/D108378

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMInstrVFP.td
    llvm/test/CodeGen/ARM/fptoi-sat-store.ll
    llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
    llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 30ded00ff0b4..9d1bfa414dff 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1605,6 +1605,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
 
   def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
                (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+  def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+               (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
 }
 
 def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
@@ -1627,6 +1629,9 @@ def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)),
 def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
                                    addrmode5:$ptr),
                    (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)),
+                                   addrmode5:$ptr),
+             (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
 
 def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
                                  (outs SPR:$Sd), (ins HPR:$Sm),
@@ -1658,6 +1663,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
 
   def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
                (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+  def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+               (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
 }
 
 def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
@@ -1680,6 +1687,9 @@ def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)),
 def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
                                    addrmode5:$ptr),
                   (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)),
+                                   addrmode5:$ptr),
+             (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
 
 def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
                                  (outs SPR:$Sd), (ins HPR:$Sm),

diff  --git a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
index a17aafb4155f..b007115130b9 100644
--- a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
+++ b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll
@@ -63,8 +63,7 @@ define void @test_signed_i32_f32(i32* %d, float %f) nounwind {
 ; VFP:       @ %bb.0:
 ; VFP-NEXT:    vmov s0, r1
 ; VFP-NEXT:    vcvt.s32.f32 s0, s0
-; VFP-NEXT:    vmov r1, s0
-; VFP-NEXT:    str r1, [r0]
+; VFP-NEXT:    vstr s0, [r0]
 ; VFP-NEXT:    bx lr
     %r = call i32 @llvm.fptosi.sat.i32.f32(float %f)
     store i32 %r, i32* %d, align 4
@@ -141,16 +140,14 @@ define void @test_signed_i32_f64(i32* %d, double %f) nounwind {
 ; VFP2:       @ %bb.0:
 ; VFP2-NEXT:    vmov d16, r2, r3
 ; VFP2-NEXT:    vcvt.s32.f64 s0, d16
-; VFP2-NEXT:    vmov r1, s0
-; VFP2-NEXT:    str r1, [r0]
+; VFP2-NEXT:    vstr s0, [r0]
 ; VFP2-NEXT:    bx lr
 ;
 ; FP16-LABEL: test_signed_i32_f64:
 ; FP16:       @ %bb.0:
 ; FP16-NEXT:    vmov d0, r2, r3
 ; FP16-NEXT:    vcvt.s32.f64 s0, d0
-; FP16-NEXT:    vmov r1, s0
-; FP16-NEXT:    str r1, [r0]
+; FP16-NEXT:    vstr s0, [r0]
 ; FP16-NEXT:    bx lr
     %r = call i32 @llvm.fptosi.sat.i32.f64(double %f)
     store i32 %r, i32* %d, align 4
@@ -200,8 +197,7 @@ define void @test_unsigned_i32_f32(i32* %d, float %f) nounwind {
 ; VFP:       @ %bb.0:
 ; VFP-NEXT:    vmov s0, r1
 ; VFP-NEXT:    vcvt.u32.f32 s0, s0
-; VFP-NEXT:    vmov r1, s0
-; VFP-NEXT:    str r1, [r0]
+; VFP-NEXT:    vstr s0, [r0]
 ; VFP-NEXT:    bx lr
     %r = call i32 @llvm.fptoui.sat.i32.f32(float %f)
     store i32 %r, i32* %d, align 4
@@ -260,16 +256,14 @@ define void @test_unsigned_i32_f64(i32* %d, double %f) nounwind {
 ; VFP2:       @ %bb.0:
 ; VFP2-NEXT:    vmov d16, r2, r3
 ; VFP2-NEXT:    vcvt.u32.f64 s0, d16
-; VFP2-NEXT:    vmov r1, s0
-; VFP2-NEXT:    str r1, [r0]
+; VFP2-NEXT:    vstr s0, [r0]
 ; VFP2-NEXT:    bx lr
 ;
 ; FP16-LABEL: test_unsigned_i32_f64:
 ; FP16:       @ %bb.0:
 ; FP16-NEXT:    vmov d0, r2, r3
 ; FP16-NEXT:    vcvt.u32.f64 s0, d0
-; FP16-NEXT:    vmov r1, s0
-; FP16-NEXT:    str r1, [r0]
+; FP16-NEXT:    vstr s0, [r0]
 ; FP16-NEXT:    bx lr
     %r = call i32 @llvm.fptoui.sat.i32.f64(double %f)
     store i32 %r, i32* %d, align 4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 1c2f64e06905..7e9904a73523 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -152,20 +152,19 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32(<4 x float> %f) {
 define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-MVE-LABEL: test_signed_v5f32_v5i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s3
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s1
-; CHECK-MVE-NEXT:    vmov r1, s4
-; CHECK-MVE-NEXT:    vmov r2, s0
-; CHECK-MVE-NEXT:    str r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r1, s2
+; CHECK-MVE-NEXT:    vmov r2, s0
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    vmov r2, s8
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_signed_v5f32_v5i32:
@@ -183,22 +182,21 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-MVE-LABEL: test_signed_v6f32_v6i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s5
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s3
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s10, s1
-; CHECK-MVE-NEXT:    vmov r1, s6
-; CHECK-MVE-NEXT:    vmov r2, s4
-; CHECK-MVE-NEXT:    strd r2, r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s5
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r1, s2
 ; CHECK-MVE-NEXT:    vmov r2, s0
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s8
 ; CHECK-MVE-NEXT:    vmov r2, s10
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
+; CHECK-MVE-NEXT:    vstr s6, [r0, #20]
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_signed_v6f32_v6i32:
@@ -218,25 +216,23 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 define arm_aapcs_vfpcc <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-MVE-LABEL: test_signed_v7f32_v7i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s5
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
-; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s6
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s10, s3
 ; CHECK-MVE-NEXT:    vcvt.s32.f32 s12, s1
-; CHECK-MVE-NEXT:    vmov r1, s8
-; CHECK-MVE-NEXT:    vmov r2, s4
-; CHECK-MVE-NEXT:    vmov r3, s6
-; CHECK-MVE-NEXT:    strd r2, r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s8, s5
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s4
+; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s6
 ; CHECK-MVE-NEXT:    vmov r1, s2
 ; CHECK-MVE-NEXT:    vmov r2, s0
-; CHECK-MVE-NEXT:    str r3, [r0, #24]
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s10
 ; CHECK-MVE-NEXT:    vmov r2, s12
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
+; CHECK-MVE-NEXT:    vstr s8, [r0, #20]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s6, [r0, #24]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_signed_v7f32_v7i32:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index e36674f6bfcb..9c8176925870 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -133,20 +133,19 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
 define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-MVE-LABEL: test_unsigned_v5f32_v5i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s2, s2
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s3
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s8, s1
-; CHECK-MVE-NEXT:    vmov r1, s4
-; CHECK-MVE-NEXT:    vmov r2, s0
-; CHECK-MVE-NEXT:    str r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r1, s2
+; CHECK-MVE-NEXT:    vmov r2, s0
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s6
 ; CHECK-MVE-NEXT:    vmov r2, s8
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_unsigned_v5f32_v5i32:
@@ -164,22 +163,21 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-MVE-LABEL: test_unsigned_v6f32_v6i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s5
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s2, s2
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s8, s3
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s10, s1
-; CHECK-MVE-NEXT:    vmov r1, s6
-; CHECK-MVE-NEXT:    vmov r2, s4
-; CHECK-MVE-NEXT:    strd r2, r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s5
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
 ; CHECK-MVE-NEXT:    vmov r1, s2
 ; CHECK-MVE-NEXT:    vmov r2, s0
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s8
 ; CHECK-MVE-NEXT:    vmov r2, s10
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
+; CHECK-MVE-NEXT:    vstr s6, [r0, #20]
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_unsigned_v6f32_v6i32:
@@ -199,25 +197,23 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 define arm_aapcs_vfpcc <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-MVE-LABEL: test_unsigned_v7f32_v7i32:
 ; CHECK-MVE:       @ %bb.0:
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s8, s5
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s6
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s2, s2
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s0, s0
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s10, s3
 ; CHECK-MVE-NEXT:    vcvt.u32.f32 s12, s1
-; CHECK-MVE-NEXT:    vmov r1, s8
-; CHECK-MVE-NEXT:    vmov r2, s4
-; CHECK-MVE-NEXT:    vmov r3, s6
-; CHECK-MVE-NEXT:    strd r2, r1, [r0, #16]
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s8, s5
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s4
+; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s6
 ; CHECK-MVE-NEXT:    vmov r1, s2
 ; CHECK-MVE-NEXT:    vmov r2, s0
-; CHECK-MVE-NEXT:    str r3, [r0, #24]
 ; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r2, r1
 ; CHECK-MVE-NEXT:    vmov r1, s10
 ; CHECK-MVE-NEXT:    vmov r2, s12
 ; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r2, r1
+; CHECK-MVE-NEXT:    vstr s8, [r0, #20]
+; CHECK-MVE-NEXT:    vstr s4, [r0, #16]
 ; CHECK-MVE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-MVE-NEXT:    vstr s6, [r0, #24]
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: test_unsigned_v7f32_v7i32:


        


More information about the llvm-commits mailing list