[llvm] [AArch64] Extend v2i64 fptosi.sat to v2f64 (PR #91714)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 01:32:44 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/91714
This helps it produce a single instruction for the saturate, as opposed to having to scalarize.
>From 78f286c8de97004f1f117d2b9f7b8d29d1d00e9b Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 10 May 2024 09:28:36 +0100
Subject: [PATCH] [AArch64] Extend v2i64 fptosi.sat to v2f64
This helps it produce a single instruction for the saturate, as opposed to
having to scalarize.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 9 ++++++++
llvm/test/CodeGen/AArch64/fcvt_combine.ll | 7 ++----
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 16 ++++---------
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 23 +++++--------------
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 23 +++++--------------
5 files changed, 27 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7344387ffe552..09de09195048d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4273,6 +4273,15 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
return SDValue();
SDLoc DL(Op);
+ // Expand to f64 if we are saturating to i64, to help produce keep the lanes
+ // the same width and produce a fcvtzu.
+ if (SatWidth == 64 && SrcElementWidth < 64) {
+ MVT F64VT = MVT::getVectorVT(MVT::f64, SrcVT.getVectorNumElements());
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, DL, F64VT, SrcVal);
+ SrcVT = F64VT;
+ SrcElementVT = MVT::f64;
+ SrcElementWidth = 64;
+ }
// Cases that we can emit directly.
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 29170aab96566..62669a6d99eae 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -345,11 +345,8 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 2ea581359af6f..4e8bfcd9d7516 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -436,12 +436,8 @@ entry:
define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
@@ -1056,12 +1052,8 @@ entry:
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c45885a38f159..d620a8851ee44 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -793,12 +793,8 @@ define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_signed_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_signed_v2f32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
@@ -1060,17 +1056,10 @@ define <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index c94db3484994c..16e04070b6543 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -707,12 +707,8 @@ define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0.s[1]
-; CHECK-NEXT: fcvtzu x8, s0
-; CHECK-NEXT: fcvtzu x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
@@ -927,17 +923,10 @@ define <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) {
define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov s3, v0.s[1]
-; CHECK-NEXT: fcvtzu x9, s0
-; CHECK-NEXT: mov s2, v1.s[1]
-; CHECK-NEXT: fcvtzu x8, s1
-; CHECK-NEXT: fcvtzu x11, s3
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzu x10, s2
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: mov v1.d[1], x10
+; CHECK-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-NEXT: fcvtzu v1.2d, v1.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f)
ret <4 x i64> %x
More information about the llvm-commits
mailing list