[llvm] 9d68ed0 - [AArch64] Allow strict opcodes in fp->int->fp patterns

Thu Feb 17 05:12:30 PST 2022

Author: John Brawn
Date: 2022-02-17T13:11:54Z
New Revision: 9d68ed08178d590e294761bde2ef471fbf3fe14d

URL: https://github.com/llvm/llvm-project/commit/9d68ed08178d590e294761bde2ef471fbf3fe14d
DIFF: https://github.com/llvm/llvm-project/commit/9d68ed08178d590e294761bde2ef471fbf3fe14d.diff

LOG: [AArch64] Allow strict opcodes in fp->int->fp patterns

These patterns don't change the fundamental instructions that are
used, just the variants that are used in order to remove some extra
MOVs.

Differential Revision: https://reviews.llvm.org/D118485

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 45f8abc4585f..0f88fc950eb4 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4983,19 +4983,19 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
 let Predicates = [HasNEON] in {
-def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
+def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
-def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
+def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
           (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
-def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
+def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
           (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
-def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
+def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
 let Predicates = [HasFullFP16] in {
-def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
+def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
-def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
+def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
           (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
 }
 }

diff  --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index 981818f68348..d7bdf2d264c4 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -72,3 +72,90 @@ entry:
   %conv1 = uitofp i32 %conv to half
   ret half %conv1
 }
+
+define double @t1_strict(double %x) #0 {
+; CHECK-LABEL: t1_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, d0
+; CHECK-NEXT:    scvtf d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
+  %conv1 = call double @llvm.experimental.constrained.sitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %conv1
+}
+
+define float @t2_strict(float %x) #0 {
+; CHECK-LABEL: t2_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    scvtf s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
+  %conv1 = call float @llvm.experimental.constrained.sitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %conv1
+}
+
+define half @t3_strict(half %x) #0 {
+; CHECK-LABEL: t3_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs h0, h0
+; CHECK-NEXT:    scvtf h0, h0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
+  %conv1 = call half @llvm.experimental.constrained.sitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret half %conv1
+}
+
+define double @t4_strict(double %x) #0 {
+; CHECK-LABEL: t4_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu d0, d0
+; CHECK-NEXT:    ucvtf d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
+  %conv1 = call double @llvm.experimental.constrained.uitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %conv1
+}
+
+define float @t5_strict(float %x) #0 {
+; CHECK-LABEL: t5_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ucvtf s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
+  %conv1 = call float @llvm.experimental.constrained.uitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %conv1
+}
+
+define half @t6_strict(half %x) #0 {
+; CHECK-LABEL: t6_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu h0, h0
+; CHECK-NEXT:    ucvtf h0, h0
+; CHECK-NEXT:    ret
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
+  %conv1 = call half @llvm.experimental.constrained.uitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret half %conv1
+}
+
+attributes #0 = { strictfp }
+
+declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
+declare half @llvm.experimental.constrained.sitofp.i32.f16(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.i32.f16(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.i32.f32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.i32.f32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.i64.f64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.i64.f64(i64, metadata, metadata)