[llvm] 875e014 - [AArch64] Codegen for new SCVTF/UCVTF variants (FEAT_FPRCVT) (#123767)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 09:12:27 PST 2025


Author: Virginia Cangelosi
Date: 2025-02-06T17:12:22Z
New Revision: 875e014471badfd0535f33551256b2fe24b4654e

URL: https://github.com/llvm/llvm-project/commit/875e014471badfd0535f33551256b2fe24b4654e
DIFF: https://github.com/llvm/llvm-project/commit/875e014471badfd0535f33551256b2fe24b4654e.diff

LOG: [AArch64] Codegen for new SCVTF/UCVTF variants (FEAT_FPRCVT) (#123767)

Adds patterns of new SCVTF/UCVTF instructions to tablegen, with
associated test .ll file.

Added: 
    llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09de..d13bb908df78e1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5511,6 +5511,15 @@ multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPa
     let Inst{31} = 1; // 64-bit FPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
+
+  def : Pat<(f16 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
+          (!cast<Instruction>(NAME # HSr) (EXTRACT_SUBREG $Rn, ssub))>;
+  def : Pat<(f64 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
+          (!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG $Rn, ssub))>;
+  def : Pat<(f16 (node (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
+          (!cast<Instruction>(NAME # HDr) (EXTRACT_SUBREG $Rn, dsub))>;
+  def : Pat<(f32 (node (i64 (extractelt (v2i64 V128:$Rn), (i64 0))))),
+          (!cast<Instruction>(NAME # SDr) (EXTRACT_SUBREG $Rn, dsub))>;
 }
 
 //---
@@ -13319,4 +13328,3 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
       let Predicates = [HasNEON, HasF8F32MM];
     }
 }
-

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a0928b9095fccd..ce0c260b78410f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5073,8 +5073,8 @@ defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
 defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
 
 let Predicates = [HasNEON, HasFPRCVT] in {
-  defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
-  defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
+  defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
+  defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
 }
 
 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),

diff  --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
new file mode 100644
index 00000000000000..0a7319b9ce11e1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -0,0 +1,441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT
+
+target triple = "aarch64-unknown-linux-gnu"
+
+
+; To demonstrate what we have implemented, we'll want a scalar integer value in a SIMD/FP register.
+; A common case for this setup is when using the result of an integer reduction intrinsic.
+
+; SCVTF
+
+define half @scvtf_f16i32(<4 x i32> %x) {
+; CHECK-LABEL: scvtf_f16i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf h0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, s0
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 0
+ %conv = sitofp i32 %extract to half
+ ret half %conv
+}
+
+define half @scvtf_f16i32_neg(<4 x i32> %x) {
+; CHECK-LABEL: scvtf_f16i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    scvtf h0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, w8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 1
+ %conv = sitofp i32 %extract to half
+ ret half %conv
+}
+
+define <1 x half> @scvtf_f16i32_simple(<1 x i32> %x) {
+; CHECK-LABEL: scvtf_f16i32_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    scvtf h0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i32_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, s0
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = sitofp <1 x i32> %x to <1 x half>
+ ret <1 x half> %conv
+}
+
+define double @scvtf_f64i32(<4 x i32> %x) {
+; CHECK-LABEL: scvtf_f64i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov w8, s0
+; CHECK-NO-FPRCVT-NEXT:    scvtf d0, w8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 0
+ %conv = sitofp i32 %extract to double
+ ret double %conv
+}
+
+define double @scvtf_f64i32_neg(<4 x i32> %x) {
+; CHECK-LABEL: scvtf_f64i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    scvtf d0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
+; CHECK-NO-FPRCVT-NEXT:    scvtf d0, w8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 1
+ %conv = sitofp i32 %extract to double
+ ret double %conv
+}
+
+; This test does not give the indended result of scvtf d0, s0
+; This is due to the input being loaded as a 2 item vector and
+; therefore using vector inputs that do not match the pattern
+; This test will be fixed in a future revision
+define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
+; CHECK-LABEL: scvtf_f64i32_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NO-FPRCVT-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = sitofp <1 x i32> %x to <1 x double>
+ ret <1 x double> %conv
+}
+
+define half @scvtf_f16i64(<2 x i64> %x) {
+; CHECK-LABEL: scvtf_f16i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf h0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 0
+ %conv = sitofp i64 %extract to half
+ ret half %conv
+}
+
+define half @scvtf_f16i64_neg(<2 x i64> %x) {
+; CHECK-LABEL: scvtf_f16i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    scvtf h0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 1
+ %conv = sitofp i64 %extract to half
+ ret half %conv
+}
+
+define <1 x half> @scvtf_f16i64_simple(<1 x i64> %x) {
+; CHECK-LABEL: scvtf_f16i64_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    scvtf h0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f16i64_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = sitofp <1 x i64> %x to <1 x half>
+ ret <1 x half> %conv
+}
+
+define float @scvtf_f32i64(<2 x i64> %x) {
+; CHECK-LABEL: scvtf_f32i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf s0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 0
+ %conv = sitofp i64 %extract to float
+ ret float %conv
+}
+
+define float @scvtf_f32i64_neg(<2 x i64> %x) {
+; CHECK-LABEL: scvtf_f32i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
+; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 1
+ %conv = sitofp i64 %extract to float
+ ret float %conv
+}
+
+; This test does not give the indended result of scvtf s0, d0
+; This is due to the input being loaded as a 2 item vector and
+; therefore using vector inputs that do not match the pattern
+; This test will be fixed in a future revision
+define <1 x float> @scvtf_f32i64_simple(<1 x i64> %x) {
+; CHECK-LABEL: scvtf_f32i64_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: scvtf_f32i64_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = sitofp <1 x i64> %x to <1 x float>
+ ret <1 x float> %conv
+}
+
+; UCVTF
+
+define half @ucvtf_f16i32(<4 x i32> %x) {
+; CHECK-LABEL: ucvtf_f16i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf h0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, s0
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 0
+ %conv = uitofp i32 %extract to half
+ ret half %conv
+}
+
+define half @ucvtf_f16i32_neg(<4 x i32> %x) {
+; CHECK-LABEL: ucvtf_f16i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    ucvtf h0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, w8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 1
+ %conv = uitofp i32 %extract to half
+ ret half %conv
+}
+
+define <1 x half> @ucvtf_f16i32_simple(<1 x i32> %x) {
+; CHECK-LABEL: ucvtf_f16i32_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ucvtf h0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i32_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, s0
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = uitofp <1 x i32> %x to <1 x half>
+ ret <1 x half> %conv
+}
+
+define double @ucvtf_f64i32(<4 x i32> %x) {
+; CHECK-LABEL: ucvtf_f64i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf d0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov w8, s0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf d0, w8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 0
+ %conv = uitofp i32 %extract to double
+ ret double %conv
+}
+
+define double @ucvtf_f64i32_neg(<4 x i32> %x) {
+; CHECK-LABEL: ucvtf_f64i32_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    ucvtf d0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
+; CHECK-NO-FPRCVT-NEXT:    ucvtf d0, w8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <4 x i32> %x, i64 1
+ %conv = uitofp i32 %extract to double
+ ret double %conv
+}
+
+; This test does not give the indended result of ucvtf d0, s0
+; This is due to the input being loaded as a 2 item vector and
+; therefore using vector inputs that do not match the pattern
+; This test will be fixed in a future revision
+define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
+; CHECK-LABEL: ucvtf_f64i32_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = uitofp <1 x i32> %x to <1 x double>
+ ret <1 x double> %conv
+}
+
+define half @ucvtf_f16i64(<2 x i64> %x) {
+; CHECK-LABEL: ucvtf_f16i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf h0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 0
+ %conv = uitofp i64 %extract to half
+ ret half %conv
+}
+
+define half @ucvtf_f16i64_neg(<2 x i64> %x) {
+; CHECK-LABEL: ucvtf_f16i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    ucvtf h0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 1
+ %conv = uitofp i64 %extract to half
+ ret half %conv
+}
+
+define <1 x half> @ucvtf_f16i64_simple(<1 x i64> %x) {
+; CHECK-LABEL: ucvtf_f16i64_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ucvtf h0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f16i64_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = uitofp <1 x i64> %x to <1 x half>
+ ret <1 x half> %conv
+}
+
+define float @ucvtf_f32i64(<2 x i64> %x) {
+; CHECK-LABEL: ucvtf_f32i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf s0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 0
+ %conv = uitofp i64 %extract to float
+ ret float %conv
+}
+
+define float @ucvtf_f32i64_neg(<2 x i64> %x) {
+; CHECK-LABEL: ucvtf_f32i64_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_neg:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
+; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %extract = extractelement <2 x i64> %x, i64 1
+ %conv = uitofp i64 %extract to float
+ ret float %conv
+}
+
+; This test does not give the indended result of ucvtf s0, d0
+; This is due to the input being loaded as a 2 item vector and
+; therefore using vector inputs that do not match the pattern
+; This test will be fixed in a future revision
+define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
+; CHECK-LABEL: ucvtf_f32i64_simple:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: ucvtf_f32i64_simple:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    ret
+ %conv = uitofp <1 x i64> %x to <1 x float>
+ ret <1 x float> %conv
+}


        


More information about the llvm-commits mailing list