[llvm] [AArch64] Optimize more floating-point round+convert combinations into fcvt instructions (PR #170018)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 29 17:39:56 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (valadaptive)
<details>
<summary>Changes</summary>
Resolves https://github.com/llvm/llvm-project/issues/170010.
This PR adds more instruction selection patterns for lowering floating-point rounding operations (ceil, floor, trunc, round, etc.) to `FCVT[AMNPZ][SU]` instructions. There are two optimizations added here, which are somewhat related:
- A `roundeven` operation followed by a float-to-int conversion can be lowered to a `FCVTNS`/`FCVTNU`.
- These optimizations, which were previously only performed on *single* floats, are now done on vectors as well.
An open question is whether it's legal to optimize a `rint` or `nearbyint` + conversion into a `FCVTNS`/`FCVTNU`; https://github.com/llvm/llvm-project/issues/77561 will need to be resolved first, so I've not implemented those optimizations now.
---
Patch is 43.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170018.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+37-8)
- (modified) llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll (+332)
- (added) llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll (+832)
- (modified) llvm/test/CodeGen/AArch64/shuffle-tbl34.ll (+9-17)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index da93a2b13fc11..02ae9546f7ccf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5830,6 +5830,33 @@ multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi,
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
+// Fused round + convert to int patterns for vectors
+multiclass SIMDTwoVectorFPToIntRoundPats<SDNode to_int, SDNode round, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(v4i16 (to_int (round v4f16:$Rn))),
+ (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
+ def : Pat<(v8i16 (to_int (round v8f16:$Rn))),
+ (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
+ }
+ def : Pat<(v2i32 (to_int (round v2f32:$Rn))),
+ (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
+ def : Pat<(v4i32 (to_int (round v4f32:$Rn))),
+ (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
+ def : Pat<(v2i64 (to_int (round v2f64:$Rn))),
+ (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
+}
+
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fceil, "FCVTPS">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fceil, "FCVTPU">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, ffloor, "FCVTMS">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, ffloor, "FCVTMU">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, ftrunc, "FCVTZS">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, ftrunc, "FCVTZU">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fround, "FCVTAS">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fround, "FCVTAU">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, froundeven, "FCVTNS">;
+defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, froundeven, "FCVTNU">;
+
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
@@ -6817,14 +6844,16 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_g
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
}
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, froundeven, "FCVTNS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, froundeven, "FCVTNU">;
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
index a729772f2897a..48e7972b04a6c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll
@@ -543,6 +543,138 @@ define double @fcvtau_dd_round_simd(double %a) {
ret double %bc
}
+define double @fcvtns_ds_roundeven_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_ds_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_ds_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = fptosi float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtns_sd_roundeven_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_sd_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_sd_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = fptosi double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtns_ss_roundeven_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_ss_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_ss_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = fptosi float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtns_dd_roundeven_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_dd_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_dd_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = fptosi double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+
+define double @fcvtnu_ds_roundeven_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_ds_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = fptoui float %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtnu_sd_roundeven_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_sd_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = fptoui double %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtnu_ss_roundeven_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_ss_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = fptoui float %r to i32
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtnu_dd_roundeven_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_roundeven_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_dd_roundeven_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = fptoui double %r to i64
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
define double @fcvtms_ds_round_simd(float %a) {
; CHECK-NOFPRCVT-LABEL: fcvtms_ds_round_simd:
@@ -1342,6 +1474,206 @@ define double @fcvtau_dd_simd(double %a) {
ret double %bc
}
+define float @fcvtns_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_sh_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_sh_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, h0
+; CHECK-NEXT: ret
+ %r = call half @llvm.roundeven.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptosi.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtns_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_dh_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_dh_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, h0
+; CHECK-NEXT: ret
+ %r = call half @llvm.roundeven.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptosi.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtns_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_ds_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_ds_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = call i64 @llvm.fptosi.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtns_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_sd_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_sd_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = call i32 @llvm.fptosi.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtns_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_ss_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_ss_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns s0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = call i32 @llvm.fptosi.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtns_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtns_dd_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtns_dd_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns d0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = call i64 @llvm.fptosi.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtnu_sh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_sh_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu w8, h0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_sh_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, h0
+; CHECK-NEXT: ret
+ %r = call half @llvm.roundeven.f16(half %a) nounwind readnone
+ %i = call i32 @llvm.fptoui.sat.i32.f16(half %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtnu_dh_simd(half %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_dh_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu x8, h0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_dh_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, h0
+; CHECK-NEXT: ret
+ %r = call half @llvm.roundeven.f16(half %a) nounwind readnone
+ %i = call i64 @llvm.fptoui.sat.i64.f16(half %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define double @fcvtnu_ds_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0
+; CHECK-NOFPRCVT-NEXT: fmov d0, x8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_ds_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = call i64 @llvm.fptoui.sat.i64.f32(float %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
+define float @fcvtnu_sd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0
+; CHECK-NOFPRCVT-NEXT: fmov s0, w8
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_sd_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = call i32 @llvm.fptoui.sat.i32.f64(double %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define float @fcvtnu_ss_simd(float %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_ss_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu s0, s0
+; CHECK-NEXT: ret
+ %r = call float @llvm.roundeven.f32(float %a)
+ %i = call i32 @llvm.fptoui.sat.i32.f32(float %r)
+ %bc = bitcast i32 %i to float
+ ret float %bc
+}
+
+define double @fcvtnu_dd_simd(double %a) {
+; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_simd:
+; CHECK-NOFPRCVT: // %bb.0:
+; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0
+; CHECK-NOFPRCVT-NEXT: ret
+;
+; CHECK-LABEL: fcvtnu_dd_simd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu d0, d0
+; CHECK-NEXT: ret
+ %r = call double @llvm.roundeven.f64(double %a)
+ %i = call i64 @llvm.fptoui.sat.i64.f64(double %r)
+ %bc = bitcast i64 %i to double
+ ret double %bc
+}
+
define float @fcvtms_sh_simd(half %a) {
; CHECK-NOFPRCVT-LABEL: fcvtms_sh_simd:
; CHECK-NOFPRCVT: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll
new file mode 100644
index 0000000000000..5a2bc3bb80225
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll
@@ -0,0 +1,832 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
+; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+;
+; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.)
+;
+
+;
+; round + signed -> fcvtas
+;
+
+define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtas_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtas_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtas v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+;
+; round + unsigned -> fcvtau
+;
+
+define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtau_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtau_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtau v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+;
+; roundeven + signed -> fcvtns
+;
+
+define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtns_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtns_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtns v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+;
+; roundeven + unsigned -> fcvtnu
+;
+
+define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtnu_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
+ %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtnu_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtnu v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
+ %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+;
+; floor + signed -> fcvtms
+;
+
+define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
+; CHECK-LABEL: fcvtms_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+ %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
+; CHECK-LABEL: fcvtms_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtms v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+ %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
+}
+
+;
+; floor + unsigned -> fcvtmu
+;
+
+define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
+; CHECK-LABEL: fcvtmu_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtmu v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+ %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170018
More information about the llvm-commits
mailing list