[llvm] 248ad71 - [AArch64] Correct SCVTF/UCVTF instructions for vector input (#152974)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 12 06:53:57 PDT 2025
Author: Amina Chabane
Date: 2025-09-12T14:53:54+01:00
New Revision: 248ad71747e0e0f49876e7d95905219822f400a6
URL: https://github.com/llvm/llvm-project/commit/248ad71747e0e0f49876e7d95905219822f400a6
DIFF: https://github.com/llvm/llvm-project/commit/248ad71747e0e0f49876e7d95905219822f400a6.diff
LOG: [AArch64] Correct SCVTF/UCVTF instructions for vector input (#152974)
This pull request improves support for scalar floating-point conversions
from integer vectors on AArch64, specifically for the `scvtf` and
`ucvtf` instructions. It fixes pattern matching so that single-element
conversions from vectors now generate the expected scalar instructions
and adds a new test to verify correct behavior for extracting a lane
from a widened vector.
**Pattern matching and code generation improvements:**
* Added new patterns in `AArch64InstrInfo.td` to correctly match
conversions from `v2i32` to `v1f64` using `scvtf` and `ucvtf`, ensuring
the scalar instructions (`scvtf d0, s0` and `ucvtf d0, s0`) are
generated when extracting a single lane.
**Test updates and additions:**
* Updated `scvtf_f64i32_simple` and `ucvtf_f64i32_simple` tests in
`fprcvt-cvtf.ll` to reflect the correct generation of scalar
instructions, removing previous comments about incorrect codegen and
showing the expected output.
* Added a new test `uitofp_sext_v2i32_extract_lane0` to verify correct
code generation when extracting a lane from a widened vector and
converting to double.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f0020a9a3c91d..3fcafc6d35090 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5409,6 +5409,11 @@ defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
let Predicates = [HasNEON, HasFPRCVT] in {
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
+
+ def : Pat<(v1f64 (extract_subvector (v2f64 (sint_to_fp (v2i64 (sext (v2i32 V64:$Rn))))), (i64 0))),
+ (SCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
+ def : Pat<(v1f64 (extract_subvector (v2f64 (uint_to_fp (v2i64 (zext (v2i32 V64:$Rn))))), (i64 0))),
+ (UCVTFDSr (EXTRACT_SUBREG V64:$Rn, ssub))>;
}
def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 9da6f583cec01..3ea1a01cfc977 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -94,16 +94,10 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) {
ret double %conv
}
-; This test does not give the indended result of scvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
; CHECK-LABEL: scvtf_f64i32_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: scvtf d0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: scvtf_f64i32_simple:
@@ -315,16 +309,10 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) {
ret double %conv
}
-; This test does not give the indended result of ucvtf d0, s0
-; This is due to the input being loaded as a 2 item vector and
-; therefore using vector inputs that do not match the pattern
-; This test will be fixed in a future revision
define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
; CHECK-LABEL: ucvtf_f64i32_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ucvtf d0, s0
; CHECK-NEXT: ret
;
; CHECK-NO-FPRCVT-LABEL: ucvtf_f64i32_simple:
@@ -449,3 +437,24 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
%conv = uitofp <1 x i64> %x to <1 x float>
ret <1 x float> %conv
}
+
+define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) {
+; CHECK-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FPRCVT-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK-NO-FPRCVT: // %bb.0:
+; CHECK-NO-FPRCVT-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NO-FPRCVT-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NO-FPRCVT-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NO-FPRCVT-NEXT: ret
+ %wide = sext <2 x i32> %x to <2 x i64>
+ %fpv2 = uitofp <2 x i64> %wide to <2 x double>
+ %lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer
+ ret <1 x double> %lane0
+}
+
More information about the llvm-commits
mailing list