[llvm] [AArch64] Correct SCVTF/UCVTF instructions for vector input (PR #152974)

Mon Sep 8 02:52:26 PDT 2025

https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/152974

>From e82a22197d9823a75541808fbf6e176bbd077a0f Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Mon, 11 Aug 2025 08:47:24 +0000
Subject: [PATCH 1/2] [AArch64] Correct SCVTF/UCVTF instructions for vector
 inputs

---
 .../lib/Target/AArch64/AArch64InstrFormats.td |  5 +++++
 llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll      | 22 ++++---------------
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ba7cbccc0bcd6..3b5643893bda3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5520,6 +5520,11 @@ multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPa
     let Inst{31} = 1; // 64-bit FPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
+  def : Pat<(v1f64 (extract_subvector (v2f64 (node (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
+        (!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG V64:$Rn, ssub))>;
+
+  def : Pat<(v1f64 (extract_subvector (v2f64 (node (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
+          (!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG V64:$Rn, ssub))>;
 
   def : Pat<(f16 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
           (!cast<Instruction>(NAME # HSr) (EXTRACT_SUBREG V128:$Rn, ssub))>;
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 9da6f583cec01..8ccc81b732896 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -94,16 +94,10 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) {
  ret double %conv
 }
 
-; This test does not give the indended result of scvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
 define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-LABEL: scvtf_f64i32_simple:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    scvtf d0, s0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple:
@@ -202,10 +196,8 @@ define float @scvtf_f32i64_neg(<2 x i64> %x) {
  ret float %conv
 }
 
-; This test does not give the indended result of scvtf s0, d0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
+; <1 x float> is illegal on AArch64 and is widened to <2 x float>.
+; This widening introduces the extra insert/extract/zeroing instructions.
 define <1 x float> @scvtf_f32i64_simple(<1 x i64> %x) {
 ; CHECK-LABEL: scvtf_f32i64_simple:
 ; CHECK:       // %bb.0:
@@ -315,16 +307,10 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) {
  ret double %conv
 }
 
-; This test does not give the indended result of ucvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
 define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-LABEL: ucvtf_f64i32_simple:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ucvtf d0, s0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple:

>From b2bb04339b1c53743ea85f3ba9a634bda39bd842 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Wed, 20 Aug 2025 16:27:07 +0000
Subject: [PATCH 2/2] Add isel patterns for SCVTF/UCVTF

Amended patterns to match the signed path (sitofp(sext(...))) to SCVTFDSr and
the unsigned path (uitofp(zext(...))) to UCVTFDSr. Moved to AArch64InstrInfo.td.
Added test in fprcvt-cvtf.ll to verify.
---
 .../lib/Target/AArch64/AArch64InstrFormats.td |  7 +-----
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  5 ++++
 llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll      | 23 +++++++++++++++++++
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3b5643893bda3..dd48b564a0c53 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5520,12 +5520,7 @@ multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPa
     let Inst{31} = 1; // 64-bit FPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
-  def : Pat<(v1f64 (extract_subvector (v2f64 (node (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
-        (!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG V64:$Rn, ssub))>;
-
-  def : Pat<(v1f64 (extract_subvector (v2f64 (node (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
-          (!cast<Instruction>(NAME # DSr) (EXTRACT_SUBREG V64:$Rn, ssub))>;
-
+  
   def : Pat<(f16 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
           (!cast<Instruction>(NAME # HSr) (EXTRACT_SUBREG V128:$Rn, ssub))>;
   def : Pat<(f64 (node (i32 (extractelt (v4i32 V128:$Rn), (i64 0))))),
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0cb7b02d84a6e..4bc3581451d03 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5287,6 +5287,11 @@ defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
 let Predicates = [HasNEON, HasFPRCVT] in {
   defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
   defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
+
+  def : Pat<(v1f64 (extract_subvector (v2f64 (sint_to_fp (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
+          (SCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
+  def : Pat< (v1f64 (extract_subvector (v2f64 (uint_to_fp (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
+          (UCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
 }
 
 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 8ccc81b732896..827e3a499a66c 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -435,3 +435,26 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
  %conv = uitofp <1 x i64> %x to <1 x float>
  ret <1 x float> %conv
 }
+
+define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) {
+; CHECK-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NOT:     ucvtf d0, s0
+; CHECK-NOT:     scvtf d0, s0
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FPRCVT-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK-NO-FPRCVT:       // %bb.0:
+; CHECK-NO-FPRCVT-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NO-FPRCVT-NEXT:    ret
+  %wide  = sext <2 x i32> %x to <2 x i64>
+  %fpv2  = uitofp <2 x i64> %wide to <2 x double>
+  %lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer
+  ret <1 x double> %lane0
+}
+